mirror of
https://github.com/moparisthebest/SickRage
synced 2025-01-05 10:58:01 -05:00
141 lines
4.9 KiB
Python
141 lines
4.9 KiB
Python
|
import os
|
||
|
import sys
|
||
|
import traceback
|
||
|
import StringIO
|
||
|
import warnings
|
||
|
import re
|
||
|
|
||
|
warnings.simplefilter("error")
|
||
|
|
||
|
from support import html5lib_test_files as data_files
|
||
|
from support import TestData, convert, convertExpected
|
||
|
import html5lib
|
||
|
from html5lib import html5parser, treebuilders, constants
|
||
|
|
||
|
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
|
||
|
"DOM":treebuilders.getTreeBuilder("dom")}
|
||
|
|
||
|
#Try whatever etree implementations are avaliable from a list that are
|
||
|
#"supposed" to work
|
||
|
try:
|
||
|
import xml.etree.ElementTree as ElementTree
|
||
|
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||
|
except ImportError:
|
||
|
try:
|
||
|
import elementtree.ElementTree as ElementTree
|
||
|
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||
|
except ImportError:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
import xml.etree.cElementTree as cElementTree
|
||
|
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||
|
except ImportError:
|
||
|
try:
|
||
|
import cElementTree
|
||
|
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||
|
except ImportError:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
try:
|
||
|
import lxml.html as lxml
|
||
|
except ImportError:
|
||
|
import lxml.etree as lxml
|
||
|
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
|
||
|
except ImportError:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
import BeautifulSoup
|
||
|
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
|
||
|
except ImportError:
|
||
|
pass
|
||
|
|
||
|
#Try whatever dom implementations are avaliable from a list that are
|
||
|
#"supposed" to work
|
||
|
try:
|
||
|
import pxdom
|
||
|
treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
|
||
|
except ImportError:
|
||
|
pass
|
||
|
|
||
|
#Run the parse error checks
|
||
|
checkParseErrors = False
|
||
|
|
||
|
#XXX - There should just be one function here but for some reason the testcase
|
||
|
#format differs from the treedump format by a single space character
|
||
|
def convertTreeDump(data):
|
||
|
return "\n".join(convert(3)(data).split("\n")[1:])
|
||
|
|
||
|
namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
|
||
|
|
||
|
|
||
|
def runParserTest(innerHTML, input, expected, errors, treeClass,
|
||
|
namespaceHTMLElements):
|
||
|
#XXX - move this out into the setup function
|
||
|
#concatenate all consecutive character tokens into a single token
|
||
|
try:
|
||
|
p = html5parser.HTMLParser(tree = treeClass,
|
||
|
namespaceHTMLElements=namespaceHTMLElements)
|
||
|
except constants.DataLossWarning:
|
||
|
return
|
||
|
|
||
|
try:
|
||
|
if innerHTML:
|
||
|
document = p.parseFragment(input, innerHTML)
|
||
|
else:
|
||
|
try:
|
||
|
document = p.parse(input)
|
||
|
except constants.DataLossWarning:
|
||
|
return
|
||
|
except:
|
||
|
errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
|
||
|
u"\nTraceback:", traceback.format_exc()])
|
||
|
assert False, errorMsg.encode("utf8")
|
||
|
|
||
|
output = convertTreeDump(p.tree.testSerializer(document))
|
||
|
|
||
|
expected = convertExpected(expected)
|
||
|
if namespaceHTMLElements:
|
||
|
expected = namespaceExpected(r"\1<html \2>", expected)
|
||
|
|
||
|
errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
|
||
|
u"\nReceived:", output])
|
||
|
assert expected == output, errorMsg.encode("utf8")
|
||
|
errStr = [u"Line: %i Col: %i %s"%(line, col,
|
||
|
constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
|
||
|
((line,col), errorcode, datavars) in p.errors]
|
||
|
|
||
|
errorMsg2 = u"\n".join([u"\n\nInput:", input,
|
||
|
u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
|
||
|
u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
|
||
|
if checkParseErrors:
|
||
|
assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
|
||
|
|
||
|
def test_parser():
|
||
|
sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n")
|
||
|
files = data_files('tree-construction')
|
||
|
|
||
|
for filename in files:
|
||
|
testName = os.path.basename(filename).replace(".dat","")
|
||
|
|
||
|
tests = TestData(filename, "data")
|
||
|
|
||
|
for index, test in enumerate(tests):
|
||
|
input, errors, innerHTML, expected = [test[key] for key in
|
||
|
'data', 'errors',
|
||
|
'document-fragment',
|
||
|
'document']
|
||
|
if errors:
|
||
|
errors = errors.split("\n")
|
||
|
|
||
|
for treeName, treeCls in treeTypes.iteritems():
|
||
|
for namespaceHTMLElements in (True, False):
|
||
|
print input
|
||
|
yield (runParserTest, innerHTML, input, expected, errors, treeCls,
|
||
|
namespaceHTMLElements)
|
||
|
break
|
||
|
|
||
|
|