mirror of
https://github.com/moparisthebest/SickRage
synced 2024-11-11 03:45:01 -05:00
0d9fbc1ad7
This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy!
312 lines
12 KiB
Python
312 lines
12 KiB
Python
import os
|
|
import sys
|
|
import StringIO
|
|
import unittest
|
|
import warnings
|
|
|
|
warnings.simplefilter("error")
|
|
|
|
from support import html5lib_test_files, TestData, convertExpected
|
|
|
|
from html5lib import html5parser, treewalkers, treebuilders, constants
|
|
from html5lib.filters.lint import Filter as LintFilter, LintError
|
|
|
|
def PullDOMAdapter(node):
|
|
from xml.dom import Node
|
|
from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
|
|
|
|
if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
|
|
for childNode in node.childNodes:
|
|
for event in PullDOMAdapter(childNode):
|
|
yield event
|
|
|
|
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
|
|
raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
|
|
|
|
elif node.nodeType == Node.COMMENT_NODE:
|
|
yield COMMENT, node
|
|
|
|
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
|
|
yield CHARACTERS, node
|
|
|
|
elif node.nodeType == Node.ELEMENT_NODE:
|
|
yield START_ELEMENT, node
|
|
for childNode in node.childNodes:
|
|
for event in PullDOMAdapter(childNode):
|
|
yield event
|
|
yield END_ELEMENT, node
|
|
|
|
else:
|
|
raise NotImplementedError("Node type not supported: " + str(node.nodeType))
|
|
|
|
treeTypes = {
|
|
"simpletree": {"builder": treebuilders.getTreeBuilder("simpletree"),
|
|
"walker": treewalkers.getTreeWalker("simpletree")},
|
|
"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
|
|
"walker": treewalkers.getTreeWalker("dom")},
|
|
"PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
|
|
"adapter": PullDOMAdapter,
|
|
"walker": treewalkers.getTreeWalker("pulldom")},
|
|
}
|
|
|
|
#Try whatever etree implementations are available from a list that are
|
|
#"supposed" to work
|
|
try:
|
|
import xml.etree.ElementTree as ElementTree
|
|
treeTypes['ElementTree'] = \
|
|
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
|
|
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
|
|
except ImportError:
|
|
try:
|
|
import elementtree.ElementTree as ElementTree
|
|
treeTypes['ElementTree'] = \
|
|
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
|
|
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import xml.etree.cElementTree as ElementTree
|
|
treeTypes['cElementTree'] = \
|
|
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
|
|
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
|
|
except ImportError:
|
|
try:
|
|
import cElementTree as ElementTree
|
|
treeTypes['cElementTree'] = \
|
|
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
|
|
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import lxml.etree as ElementTree
|
|
# treeTypes['lxml_as_etree'] = \
|
|
# {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
|
|
# "walker": treewalkers.getTreeWalker("etree", ElementTree)}
|
|
treeTypes['lxml_native'] = \
|
|
{"builder": treebuilders.getTreeBuilder("lxml"),
|
|
"walker": treewalkers.getTreeWalker("lxml")}
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
import BeautifulSoup
|
|
treeTypes["beautifulsoup"] = \
|
|
{"builder": treebuilders.getTreeBuilder("beautifulsoup"),
|
|
"walker": treewalkers.getTreeWalker("beautifulsoup")}
|
|
except ImportError:
|
|
pass
|
|
|
|
#Try whatever etree implementations are available from a list that are
|
|
#"supposed" to work
|
|
try:
|
|
import pxdom
|
|
treeTypes['pxdom'] = \
|
|
{"builder": treebuilders.getTreeBuilder("dom", pxdom),
|
|
"walker": treewalkers.getTreeWalker("dom")}
|
|
except ImportError:
|
|
pass
|
|
|
|
try:
|
|
from genshi.core import QName, Attrs
|
|
from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
|
|
|
|
def GenshiAdapter(tree):
|
|
text = None
|
|
for token in treewalkers.getTreeWalker("simpletree")(tree):
|
|
type = token["type"]
|
|
if type in ("Characters", "SpaceCharacters"):
|
|
if text is None:
|
|
text = token["data"]
|
|
else:
|
|
text += token["data"]
|
|
elif text is not None:
|
|
yield TEXT, text, (None, -1, -1)
|
|
text = None
|
|
|
|
if type in ("StartTag", "EmptyTag"):
|
|
if token["namespace"]:
|
|
name = u"{%s}%s" % (token["namespace"], token["name"])
|
|
else:
|
|
name = token["name"]
|
|
yield (START,
|
|
(QName(name),
|
|
Attrs([(QName(attr),value) for attr,value in token["data"]])),
|
|
(None, -1, -1))
|
|
if type == "EmptyTag":
|
|
type = "EndTag"
|
|
|
|
if type == "EndTag":
|
|
yield END, QName(token["name"]), (None, -1, -1)
|
|
|
|
elif type == "Comment":
|
|
yield COMMENT, token["data"], (None, -1, -1)
|
|
|
|
elif type == "Doctype":
|
|
yield DOCTYPE, (token["name"], token["publicId"],
|
|
token["systemId"]), (None, -1, -1)
|
|
|
|
else:
|
|
pass # FIXME: What to do?
|
|
|
|
if text is not None:
|
|
yield TEXT, text, (None, -1, -1)
|
|
|
|
#treeTypes["genshi"] = \
|
|
# {"builder": treebuilders.getTreeBuilder("simpletree"),
|
|
# "adapter": GenshiAdapter,
|
|
# "walker": treewalkers.getTreeWalker("genshi")}
|
|
except ImportError:
|
|
pass
|
|
|
|
def concatenateCharacterTokens(tokens):
|
|
charactersToken = None
|
|
for token in tokens:
|
|
type = token["type"]
|
|
if type in ("Characters", "SpaceCharacters"):
|
|
if charactersToken is None:
|
|
charactersToken = {"type": "Characters", "data": token["data"]}
|
|
else:
|
|
charactersToken["data"] += token["data"]
|
|
else:
|
|
if charactersToken is not None:
|
|
yield charactersToken
|
|
charactersToken = None
|
|
yield token
|
|
if charactersToken is not None:
|
|
yield charactersToken
|
|
|
|
def convertTokens(tokens):
|
|
output = []
|
|
indent = 0
|
|
for token in concatenateCharacterTokens(tokens):
|
|
type = token["type"]
|
|
if type in ("StartTag", "EmptyTag"):
|
|
if (token["namespace"] and
|
|
token["namespace"] != constants.namespaces["html"]):
|
|
if token["namespace"] in constants.prefixes:
|
|
name = constants.prefixes[token["namespace"]]
|
|
else:
|
|
name = token["namespace"]
|
|
name += u" " + token["name"]
|
|
else:
|
|
name = token["name"]
|
|
output.append(u"%s<%s>" % (" "*indent, name))
|
|
indent += 2
|
|
attrs = token["data"]
|
|
if attrs:
|
|
#TODO: Remove this if statement, attrs should always exist
|
|
for (namespace,name),value in sorted(attrs.items()):
|
|
if namespace:
|
|
if namespace in constants.prefixes:
|
|
outputname = constants.prefixes[namespace]
|
|
else:
|
|
outputname = namespace
|
|
outputname += u" " + name
|
|
else:
|
|
outputname = name
|
|
output.append(u"%s%s=\"%s\"" % (" "*indent, outputname, value))
|
|
if type == "EmptyTag":
|
|
indent -= 2
|
|
elif type == "EndTag":
|
|
indent -= 2
|
|
elif type == "Comment":
|
|
output.append("%s<!-- %s -->" % (" "*indent, token["data"]))
|
|
elif type == "Doctype":
|
|
if token["name"]:
|
|
if token["publicId"]:
|
|
output.append("""%s<!DOCTYPE %s "%s" "%s">"""%
|
|
(" "*indent, token["name"],
|
|
token["publicId"],
|
|
token["systemId"] and token["systemId"] or ""))
|
|
elif token["systemId"]:
|
|
output.append("""%s<!DOCTYPE %s "" "%s">"""%
|
|
(" "*indent, token["name"],
|
|
token["systemId"]))
|
|
else:
|
|
output.append("%s<!DOCTYPE %s>"%(" "*indent,
|
|
token["name"]))
|
|
else:
|
|
output.append("%s<!DOCTYPE >" % (" "*indent,))
|
|
elif type in ("Characters", "SpaceCharacters"):
|
|
output.append("%s\"%s\"" % (" "*indent, token["data"]))
|
|
else:
|
|
pass # TODO: what to do with errors?
|
|
return u"\n".join(output)
|
|
|
|
import re
|
|
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+",re.M)
|
|
def sortattrs(x):
|
|
lines = x.group(0).split("\n")
|
|
lines.sort()
|
|
return "\n".join(lines)
|
|
|
|
|
|
class TokenTestCase(unittest.TestCase):
|
|
def test_all_tokens(self):
|
|
expected = [
|
|
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'},
|
|
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
|
|
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
|
|
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
|
|
{'data': u'a', 'type': 'Characters'},
|
|
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
|
|
{'data': u'b', 'type': 'Characters'},
|
|
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
|
|
{'data': u'c', 'type': 'Characters'},
|
|
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
|
|
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'}
|
|
]
|
|
for treeName, treeCls in treeTypes.iteritems():
|
|
p = html5parser.HTMLParser(tree = treeCls["builder"])
|
|
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
|
|
document = treeCls.get("adapter", lambda x: x)(document)
|
|
output = treeCls["walker"](document)
|
|
for expectedToken, outputToken in zip(expected, output):
|
|
self.assertEquals(expectedToken, outputToken)
|
|
|
|
def run_test(innerHTML, input, expected, errors, treeClass):
|
|
try:
|
|
p = html5parser.HTMLParser(tree = treeClass["builder"])
|
|
if innerHTML:
|
|
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
|
|
else:
|
|
document = p.parse(StringIO.StringIO(input))
|
|
except constants.DataLossWarning:
|
|
#Ignore testcases we know we don't pass
|
|
return
|
|
|
|
document = treeClass.get("adapter", lambda x: x)(document)
|
|
try:
|
|
output = convertTokens(treeClass["walker"](document))
|
|
output = attrlist.sub(sortattrs, output)
|
|
expected = attrlist.sub(sortattrs, convertExpected(expected))
|
|
assert expected == output, "\n".join([
|
|
"", "Input:", input,
|
|
"", "Expected:", expected,
|
|
"", "Received:", output
|
|
])
|
|
except NotImplementedError:
|
|
pass # Amnesty for those that confess...
|
|
|
|
def test_treewalker():
|
|
sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
|
|
|
|
for treeName, treeCls in treeTypes.iteritems():
|
|
files = html5lib_test_files('tree-construction')
|
|
for filename in files:
|
|
testName = os.path.basename(filename).replace(".dat","")
|
|
|
|
tests = TestData(filename, "data")
|
|
|
|
for index, test in enumerate(tests):
|
|
(input, errors,
|
|
innerHTML, expected) = [test[key] for key in ("data", "errors",
|
|
"document-fragment",
|
|
"document")]
|
|
errors = errors.split("\n")
|
|
yield run_test, innerHTML, input, expected, errors, treeCls
|
|
|
|
|