import os import sys import codecs import glob base_path = os.path.split(__file__)[0] if os.path.exists(os.path.join(base_path, 'testdata')): #release test_dir = os.path.join(base_path, 'testdata') else: #development test_dir = os.path.abspath( os.path.join(base_path, os.path.pardir, os.path.pardir, os.path.pardir, 'testdata')) assert os.path.exists(test_dir), "Test data not found" #import the development html5lib sys.path.insert(0, os.path.abspath(os.path.join(base_path, os.path.pardir, os.path.pardir))) import html5lib from html5lib import html5parser, treebuilders del base_path #Build a dict of avaliable trees treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"), "DOM":treebuilders.getTreeBuilder("dom")} #Try whatever etree implementations are avaliable from a list that are #"supposed" to work try: import xml.etree.ElementTree as ElementTree treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) except ImportError: try: import elementtree.ElementTree as ElementTree treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True) except ImportError: pass try: import xml.etree.cElementTree as cElementTree treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) except ImportError: try: import cElementTree treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True) except ImportError: pass try: import lxml.etree as lxml treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True) except ImportError: pass try: import BeautifulSoup treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True) except ImportError: pass def html5lib_test_files(subdirectory, files='*.dat'): return glob.glob(os.path.join(test_dir,subdirectory,files)) class DefaultDict(dict): def __init__(self, default, *args, **kwargs): self.default = default dict.__init__(self, *args, **kwargs) def __getitem__(self, key): return dict.get(self, key, self.default) class TestData(object): def __init__(self, filename, newTestHeading="data"): self.f = codecs.open(filename, encoding="utf8") self.newTestHeading = newTestHeading def __iter__(self): data = DefaultDict(None) key=None for line in self.f: heading = self.isSectionHeading(line) if heading: if data and heading == self.newTestHeading: #Remove trailing newline data[key] = data[key][:-1] yield self.normaliseOutput(data) data = DefaultDict(None) key = heading data[key]="" elif key is not None: data[key] += line if data: yield self.normaliseOutput(data) def isSectionHeading(self, line): """If the current heading is a test section heading return the heading, otherwise return False""" if line.startswith("#"): return line[1:].strip() else: return False def normaliseOutput(self, data): #Remove trailing newlines for key,value in data.iteritems(): if value.endswith("\n"): data[key] = value[:-1] return data def convert(stripChars): def convertData(data): """convert the output of str(document) to the format used in the testcases""" data = data.split("\n") rv = [] for line in data: if line.startswith("|"): rv.append(line[stripChars:]) else: rv.append(line) return "\n".join(rv) return convertData convertExpected = convert(2)