SickRage/lib/html5lib/treebuilders/etree_lxml.py

import warnings
import re

import _base
from html5lib.constants import DataLossWarning
import html5lib.constants as constants
import etree as etree_builders
from html5lib import ihatexml

try:
    import lxml.etree as etree
except ImportError:
    pass

fullTree = True
tag_regexp = re.compile("{([^}]*)}(.*)")

"""Module for supporting the lxml.etree library. The idea here is to use as much
of the native library as possible, without using fragile hacks like custom element
names that break between releases. The downside of this is that we cannot represent
all possible trees; specifically the following are known to cause problems:

Text or comments as siblings of the root element
Docypes with no name

When any of these things occur, we emit a DataLossWarning
"""

class DocumentType(object):
    def __init__(self, name, publicId, systemId):
        self.name = name         
        self.publicId = publicId
        self.systemId = systemId

class Document(object):
    def __init__(self):
        self._elementTree = None
        self._childNodes = []

    def appendChild(self, element):
        self._elementTree.getroot().addnext(element._element)

    def _getChildNodes(self):
        return self._childNodes
    
    childNodes = property(_getChildNodes)

def testSerializer(element):
    rv = []
    finalText = None
    filter = ihatexml.InfosetFilter()
    def serializeElement(element, indent=0):
        if not hasattr(element, "tag"):
            if  hasattr(element, "getroot"):
                #Full tree case
                rv.append("#document")
                if element.docinfo.internalDTD:
                    if not (element.docinfo.public_id or 
                            element.docinfo.system_url):
                        dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
                    else:
                        dtd_str = """<!DOCTYPE %s "%s" "%s">"""%(
                            element.docinfo.root_name, 
                            element.docinfo.public_id,
                            element.docinfo.system_url)
                    rv.append("|%s%s"%(' '*(indent+2), dtd_str))
                next_element = element.getroot()
                while next_element.getprevious() is not None:
                    next_element = next_element.getprevious()
                while next_element is not None:
                    serializeElement(next_element, indent+2)
                    next_element = next_element.getnext()
            elif isinstance(element, basestring):
                #Text in a fragment
                rv.append("|%s\"%s\""%(' '*indent, element))
            else:
                #Fragment case
                rv.append("#document-fragment")
                for next_element in element:
                    serializeElement(next_element, indent+2)
        elif type(element.tag) == type(etree.Comment):
            rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
        else:
            nsmatch = etree_builders.tag_regexp.match(element.tag)
            if nsmatch is not None:
                ns = nsmatch.group(1)
                tag = nsmatch.group(2)
                prefix = constants.prefixes[ns]
                rv.append("|%s<%s %s>"%(' '*indent, prefix,
                                        filter.fromXmlName(tag)))
            else:
                rv.append("|%s<%s>"%(' '*indent,
                                     filter.fromXmlName(element.tag)))

            if hasattr(element, "attrib"):
                attributes = []
                for name, value in element.attrib.iteritems():
                    nsmatch = tag_regexp.match(name)
                    if nsmatch is not None:
                        ns, name = nsmatch.groups()
                        name = filter.fromXmlName(name)
                        prefix = constants.prefixes[ns]
                        attr_string = "%s %s"%(prefix, name)
                    else:
                        attr_string = filter.fromXmlName(name)
                    attributes.append((attr_string, value))

                for name, value in sorted(attributes):
                    rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))

            if element.text:
                rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
            indent += 2
            for child in element.getchildren():
                serializeElement(child, indent)
        if hasattr(element, "tail") and element.tail:
            rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
    serializeElement(element, 0)

    if finalText is not None:
        rv.append("|%s\"%s\""%(' '*2, finalText))

    return "\n".join(rv)

def tostring(element):
    """Serialize an element and its child nodes to a string"""
    rv = []
    finalText = None
    def serializeElement(element):
        if not hasattr(element, "tag"):
            if element.docinfo.internalDTD:
                if element.docinfo.doctype:
                    dtd_str = element.docinfo.doctype
                else:
                    dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
                rv.append(dtd_str)
            serializeElement(element.getroot())
            
        elif type(element.tag) == type(etree.Comment):
            rv.append("<!--%s-->"%(element.text,))
        
        else:
            #This is assumed to be an ordinary element
            if not element.attrib:
                rv.append("<%s>"%(element.tag,))
            else:
                attr = " ".join(["%s=\"%s\""%(name, value) 
                                 for name, value in element.attrib.iteritems()])
                rv.append("<%s %s>"%(element.tag, attr))
            if element.text:
                rv.append(element.text)

            for child in element.getchildren():
                serializeElement(child)

            rv.append("</%s>"%(element.tag,))

        if hasattr(element, "tail") and element.tail:
            rv.append(element.tail)

    serializeElement(element)

    if finalText is not None:
        rv.append("%s\""%(' '*2, finalText))

    return "".join(rv)
        

class TreeBuilder(_base.TreeBuilder):
    documentClass = Document
    doctypeClass = DocumentType
    elementClass = None
    commentClass = None
    fragmentClass = Document    

    def __init__(self, namespaceHTMLElements, fullTree = False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        filter = self.filter = ihatexml.InfosetFilter()
        self.namespaceHTMLElements = namespaceHTMLElements

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for key, value in self.iteritems():
                    if isinstance(key, tuple):
                        name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))
                    else:
                        name = filter.coerceAttribute(key)
                    self._element._element.attrib[name] = value

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                if isinstance(key, tuple):
                    name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))
                else:
                    name = filter.coerceAttribute(key)
                self._element._element.attrib[name] = value

        class Element(builder.Element):
            def __init__(self, name, namespace):
                name = filter.coerceElement(name)
                builder.Element.__init__(self, name, namespace=namespace)
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = filter.coerceElement(name)
                self._element.tag = self._getETreeTag(
                    self._name, self._namespace)
        
            def _getName(self):
                return filter.fromXmlName(self._name)
        
            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)
    
            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = filter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)
                

        class Comment(builder.Comment):
            def __init__(self, data):
                data = filter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = filter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
    
    def reset(self):
        _base.TreeBuilder.reset(self)
        self.insertComment = self.insertCommentInitial
        self.initial_comments = []
        self.doctype = None

    def testSerializer(self, element):
        return testSerializer(element)

    def getDocument(self):
        if fullTree:
            return self.document._elementTree
        else:
            return self.document._elementTree.getroot()
    
    def getFragment(self):
        fragment = []
        element = self.openElements[0]._element
        if element.text:
            fragment.append(element.text)
        fragment.extend(element.getchildren())
        if element.tail:
            fragment.append(element.tail)
        return fragment

    def insertDoctype(self, token):
        name = token["name"]
        publicId = token["publicId"]
        systemId = token["systemId"]

        if not name or ihatexml.nonXmlNameBMPRegexp.search(name) or name[0] == '"':
            warnings.warn("lxml cannot represent null or non-xml doctype", DataLossWarning)

        doctype = self.doctypeClass(name, publicId, systemId)
        self.doctype = doctype
    
    def insertCommentInitial(self, data, parent=None):
        self.initial_comments.append(data)
    
    def insertRoot(self, token):
        """Create the document root"""
        #Because of the way libxml2 works, it doesn't seem to be possible to
        #alter information like the doctype after the tree has been parsed. 
        #Therefore we need to use the built-in parser to create our iniial 
        #tree, after which we can add elements like normal
        docStr = ""
        if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'):
            docStr += "<!DOCTYPE %s"%self.doctype.name
            if (self.doctype.publicId is not None or 
                self.doctype.systemId is not None):
                docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "",
                                               self.doctype.systemId or "")
            docStr += ">"
        docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
        
        try:
            root = etree.fromstring(docStr)
        except etree.XMLSyntaxError:
            print docStr
            raise
        
        #Append the initial comments:
        for comment_token in self.initial_comments:
            root.addprevious(etree.Comment(comment_token["data"]))
        
        #Create the root document and add the ElementTree to it
        self.document = self.documentClass()
        self.document._elementTree = root.getroottree()
        
        # Give the root element the right name
        name = token["name"]
        namespace = token.get("namespace", self.defaultNamespace)
        if namespace is None:
            etree_tag = name
        else:
            etree_tag = "{%s}%s"%(namespace, name)
        root.tag = etree_tag
        
        #Add the root element to the internal child/open data structures
        root_element = self.elementClass(name, namespace)
        root_element._element = root
        self.document._childNodes.append(root_element)
        self.openElements.append(root_element)
    
        #Reset to the default insert comment function
        self.insertComment = super(TreeBuilder, self).insertComment
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`import warnings`
			`import re`

			`import _base`
			`from html5lib.constants import DataLossWarning`
			`import html5lib.constants as constants`
			`import etree as etree_builders`
			`from html5lib import ihatexml`

			`try:`
			`import lxml.etree as etree`
			`except ImportError:`
			`pass`

			`fullTree = True`
			`tag_regexp = re.compile("{([^}])}(.)")`

			`"""Module for supporting the lxml.etree library. The idea here is to use as much`
			`of the native library as possible, without using fragile hacks like custom element`
			`names that break between releases. The downside of this is that we cannot represent`
			`all possible trees; specifically the following are known to cause problems:`

			`Text or comments as siblings of the root element`
			`Docypes with no name`

			`When any of these things occur, we emit a DataLossWarning`
			`"""`

			`class DocumentType(object):`
			`def __init__(self, name, publicId, systemId):`
			`self.name = name`
			`self.publicId = publicId`
			`self.systemId = systemId`

			`class Document(object):`
			`def __init__(self):`
			`self._elementTree = None`
			`self._childNodes = []`

			`def appendChild(self, element):`
			`self._elementTree.getroot().addnext(element._element)`

			`def _getChildNodes(self):`
			`return self._childNodes`

			`childNodes = property(_getChildNodes)`

			`def testSerializer(element):`
			`rv = []`
			`finalText = None`
			`filter = ihatexml.InfosetFilter()`
			`def serializeElement(element, indent=0):`
			`if not hasattr(element, "tag"):`
			`if hasattr(element, "getroot"):`
			`#Full tree case`
			`rv.append("#document")`
			`if element.docinfo.internalDTD:`
			`if not (element.docinfo.public_id or`
			`element.docinfo.system_url):`
			`dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name`
			`else:`
			`dtd_str = """<!DOCTYPE %s "%s" "%s">"""%(`
			`element.docinfo.root_name,`
			`element.docinfo.public_id,`
			`element.docinfo.system_url)`
			`rv.append("\|%s%s"%(' '*(indent+2), dtd_str))`
			`next_element = element.getroot()`
			`while next_element.getprevious() is not None:`
			`next_element = next_element.getprevious()`
			`while next_element is not None:`
			`serializeElement(next_element, indent+2)`
			`next_element = next_element.getnext()`
			`elif isinstance(element, basestring):`
			`#Text in a fragment`
			`rv.append("\|%s\"%s\""%(' '*indent, element))`
			`else:`
			`#Fragment case`
			`rv.append("#document-fragment")`
			`for next_element in element:`
			`serializeElement(next_element, indent+2)`
			`elif type(element.tag) == type(etree.Comment):`
			`rv.append("\|%s<!-- %s -->"%(' '*indent, element.text))`
			`else:`
			`nsmatch = etree_builders.tag_regexp.match(element.tag)`
			`if nsmatch is not None:`
			`ns = nsmatch.group(1)`
			`tag = nsmatch.group(2)`
			`prefix = constants.prefixes[ns]`
			`rv.append("\|%s<%s %s>"%(' '*indent, prefix,`
			`filter.fromXmlName(tag)))`
			`else:`
			`rv.append("\|%s<%s>"%(' '*indent,`
			`filter.fromXmlName(element.tag)))`

			`if hasattr(element, "attrib"):`
			`attributes = []`
			`for name, value in element.attrib.iteritems():`
			`nsmatch = tag_regexp.match(name)`
			`if nsmatch is not None:`
			`ns, name = nsmatch.groups()`
			`name = filter.fromXmlName(name)`
			`prefix = constants.prefixes[ns]`
			`attr_string = "%s %s"%(prefix, name)`
			`else:`
			`attr_string = filter.fromXmlName(name)`
			`attributes.append((attr_string, value))`

			`for name, value in sorted(attributes):`
			`rv.append('\|%s%s="%s"' % (' '*(indent+2), name, value))`

			`if element.text:`
			`rv.append("\|%s\"%s\"" %(' '*(indent+2), element.text))`
			`indent += 2`
			`for child in element.getchildren():`
			`serializeElement(child, indent)`
			`if hasattr(element, "tail") and element.tail:`
			`rv.append("\|%s\"%s\"" %(' '*(indent-2), element.tail))`
			`serializeElement(element, 0)`

			`if finalText is not None:`
			`rv.append("\|%s\"%s\""%(' '*2, finalText))`

			`return "\n".join(rv)`

			`def tostring(element):`
			`"""Serialize an element and its child nodes to a string"""`
			`rv = []`
			`finalText = None`
			`def serializeElement(element):`
			`if not hasattr(element, "tag"):`
			`if element.docinfo.internalDTD:`
			`if element.docinfo.doctype:`
			`dtd_str = element.docinfo.doctype`
			`else:`
			`dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name`
			`rv.append(dtd_str)`
			`serializeElement(element.getroot())`

			`elif type(element.tag) == type(etree.Comment):`
			`rv.append("<!--%s-->"%(element.text,))`

			`else:`
			`#This is assumed to be an ordinary element`
			`if not element.attrib:`
			`rv.append("<%s>"%(element.tag,))`
			`else:`
			`attr = " ".join(["%s=\"%s\""%(name, value)`
			`for name, value in element.attrib.iteritems()])`
			`rv.append("<%s %s>"%(element.tag, attr))`
			`if element.text:`
			`rv.append(element.text)`

			`for child in element.getchildren():`
			`serializeElement(child)`

			`rv.append("</%s>"%(element.tag,))`

			`if hasattr(element, "tail") and element.tail:`
			`rv.append(element.tail)`

			`serializeElement(element)`

			`if finalText is not None:`
			`rv.append("%s\""%(' '*2, finalText))`

			`return "".join(rv)`


			`class TreeBuilder(_base.TreeBuilder):`
			`documentClass = Document`
			`doctypeClass = DocumentType`
			`elementClass = None`
			`commentClass = None`
			`fragmentClass = Document`

			`def __init__(self, namespaceHTMLElements, fullTree = False):`
			`builder = etree_builders.getETreeModule(etree, fullTree=fullTree)`
			`filter = self.filter = ihatexml.InfosetFilter()`
			`self.namespaceHTMLElements = namespaceHTMLElements`

			`class Attributes(dict):`
			`def __init__(self, element, value={}):`
			`self._element = element`
			`dict.__init__(self, value)`
			`for key, value in self.iteritems():`
			`if isinstance(key, tuple):`
			`name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))`
			`else:`
			`name = filter.coerceAttribute(key)`
			`self._element._element.attrib[name] = value`

			`def __setitem__(self, key, value):`
			`dict.__setitem__(self, key, value)`
			`if isinstance(key, tuple):`
			`name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))`
			`else:`
			`name = filter.coerceAttribute(key)`
			`self._element._element.attrib[name] = value`

			`class Element(builder.Element):`
			`def __init__(self, name, namespace):`
			`name = filter.coerceElement(name)`
			`builder.Element.__init__(self, name, namespace=namespace)`
			`self._attributes = Attributes(self)`

			`def _setName(self, name):`
			`self._name = filter.coerceElement(name)`
			`self._element.tag = self._getETreeTag(`
			`self._name, self._namespace)`

			`def _getName(self):`
			`return filter.fromXmlName(self._name)`

			`name = property(_getName, _setName)`

			`def _getAttributes(self):`
			`return self._attributes`

			`def _setAttributes(self, attributes):`
			`self._attributes = Attributes(self, attributes)`

			`attributes = property(_getAttributes, _setAttributes)`

			`def insertText(self, data, insertBefore=None):`
			`data = filter.coerceCharacters(data)`
			`builder.Element.insertText(self, data, insertBefore)`

			`def appendChild(self, child):`
			`builder.Element.appendChild(self, child)`


			`class Comment(builder.Comment):`
			`def __init__(self, data):`
			`data = filter.coerceComment(data)`
			`builder.Comment.__init__(self, data)`

			`def _setData(self, data):`
			`data = filter.coerceComment(data)`
			`self._element.text = data`

			`def _getData(self):`
			`return self._element.text`

			`data = property(_getData, _setData)`

			`self.elementClass = Element`
			`self.commentClass = builder.Comment`
			`#self.fragmentClass = builder.DocumentFragment`
			`_base.TreeBuilder.__init__(self, namespaceHTMLElements)`

			`def reset(self):`
			`_base.TreeBuilder.reset(self)`
			`self.insertComment = self.insertCommentInitial`
			`self.initial_comments = []`
			`self.doctype = None`

			`def testSerializer(self, element):`
			`return testSerializer(element)`

			`def getDocument(self):`
			`if fullTree:`
			`return self.document._elementTree`
			`else:`
			`return self.document._elementTree.getroot()`

			`def getFragment(self):`
			`fragment = []`
			`element = self.openElements[0]._element`
			`if element.text:`
			`fragment.append(element.text)`
			`fragment.extend(element.getchildren())`
			`if element.tail:`
			`fragment.append(element.tail)`
			`return fragment`

			`def insertDoctype(self, token):`
			`name = token["name"]`
			`publicId = token["publicId"]`
			`systemId = token["systemId"]`

			`if not name or ihatexml.nonXmlNameBMPRegexp.search(name) or name[0] == '"':`
			`warnings.warn("lxml cannot represent null or non-xml doctype", DataLossWarning)`

			`doctype = self.doctypeClass(name, publicId, systemId)`
			`self.doctype = doctype`

			`def insertCommentInitial(self, data, parent=None):`
			`self.initial_comments.append(data)`

			`def insertRoot(self, token):`
			`"""Create the document root"""`
			`#Because of the way libxml2 works, it doesn't seem to be possible to`
			`#alter information like the doctype after the tree has been parsed.`
			`#Therefore we need to use the built-in parser to create our iniial`
			`#tree, after which we can add elements like normal`
			`docStr = ""`
			`if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'):`
			`docStr += "<!DOCTYPE %s"%self.doctype.name`
			`if (self.doctype.publicId is not None or`
			`self.doctype.systemId is not None):`
			`docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "",`
			`self.doctype.systemId or "")`
			`docStr += ">"`
			`docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"`

			`try:`
			`root = etree.fromstring(docStr)`
			`except etree.XMLSyntaxError:`
			`print docStr`
			`raise`

			`#Append the initial comments:`
			`for comment_token in self.initial_comments:`
			`root.addprevious(etree.Comment(comment_token["data"]))`

			`#Create the root document and add the ElementTree to it`
			`self.document = self.documentClass()`
			`self.document._elementTree = root.getroottree()`

			`# Give the root element the right name`
			`name = token["name"]`
			`namespace = token.get("namespace", self.defaultNamespace)`
			`if namespace is None:`
			`etree_tag = name`
			`else:`
			`etree_tag = "{%s}%s"%(namespace, name)`
			`root.tag = etree_tag`

			`#Add the root element to the internal child/open data structures`
			`root_element = self.elementClass(name, namespace)`
			`root_element._element = root`
			`self.document._childNodes.append(root_element)`
			`self.openElements.append(root_element)`

			`#Reset to the default insert comment function`
			`self.insertComment = super(TreeBuilder, self).insertComment`