mirror of
https://github.com/moparisthebest/SickRage
synced 2025-01-05 10:58:01 -05:00
257 lines
7.3 KiB
Python
257 lines
7.3 KiB
Python
|
import _base
|
||
|
from html5lib.constants import voidElements, namespaces, prefixes
|
||
|
from xml.sax.saxutils import escape
|
||
|
|
||
|
# Really crappy basic implementation of a DOM-core like thing
|
||
|
class Node(_base.Node):
|
||
|
type = -1
|
||
|
def __init__(self, name):
|
||
|
self.name = name
|
||
|
self.parent = None
|
||
|
self.value = None
|
||
|
self.childNodes = []
|
||
|
self._flags = []
|
||
|
|
||
|
def __iter__(self):
|
||
|
for node in self.childNodes:
|
||
|
yield node
|
||
|
for item in node:
|
||
|
yield item
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return self.name
|
||
|
|
||
|
def toxml(self):
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def printTree(self, indent=0):
|
||
|
tree = '\n|%s%s' % (' '* indent, unicode(self))
|
||
|
for child in self.childNodes:
|
||
|
tree += child.printTree(indent + 2)
|
||
|
return tree
|
||
|
|
||
|
def appendChild(self, node):
|
||
|
assert isinstance(node, Node)
|
||
|
if (isinstance(node, TextNode) and self.childNodes and
|
||
|
isinstance(self.childNodes[-1], TextNode)):
|
||
|
self.childNodes[-1].value += node.value
|
||
|
else:
|
||
|
self.childNodes.append(node)
|
||
|
node.parent = self
|
||
|
|
||
|
def insertText(self, data, insertBefore=None):
|
||
|
assert isinstance(data, unicode), "data %s is of type %s expected unicode"%(repr(data), type(data))
|
||
|
if insertBefore is None:
|
||
|
self.appendChild(TextNode(data))
|
||
|
else:
|
||
|
self.insertBefore(TextNode(data), insertBefore)
|
||
|
|
||
|
def insertBefore(self, node, refNode):
|
||
|
index = self.childNodes.index(refNode)
|
||
|
if (isinstance(node, TextNode) and index > 0 and
|
||
|
isinstance(self.childNodes[index - 1], TextNode)):
|
||
|
self.childNodes[index - 1].value += node.value
|
||
|
else:
|
||
|
self.childNodes.insert(index, node)
|
||
|
node.parent = self
|
||
|
|
||
|
def removeChild(self, node):
|
||
|
try:
|
||
|
self.childNodes.remove(node)
|
||
|
except:
|
||
|
# XXX
|
||
|
raise
|
||
|
node.parent = None
|
||
|
|
||
|
def cloneNode(self):
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def hasContent(self):
|
||
|
"""Return true if the node has children or text"""
|
||
|
return bool(self.childNodes)
|
||
|
|
||
|
def getNameTuple(self):
|
||
|
if self.namespace == None:
|
||
|
return namespaces["html"], self.name
|
||
|
else:
|
||
|
return self.namespace, self.name
|
||
|
|
||
|
nameTuple = property(getNameTuple)
|
||
|
|
||
|
class Document(Node):
|
||
|
type = 1
|
||
|
def __init__(self):
|
||
|
Node.__init__(self, None)
|
||
|
|
||
|
def __str__(self):
|
||
|
return "#document"
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return str(self)
|
||
|
|
||
|
def appendChild(self, child):
|
||
|
Node.appendChild(self, child)
|
||
|
|
||
|
def toxml(self, encoding="utf=8"):
|
||
|
result = ""
|
||
|
for child in self.childNodes:
|
||
|
result += child.toxml()
|
||
|
return result.encode(encoding)
|
||
|
|
||
|
def hilite(self, encoding="utf-8"):
|
||
|
result = "<pre>"
|
||
|
for child in self.childNodes:
|
||
|
result += child.hilite()
|
||
|
return result.encode(encoding) + "</pre>"
|
||
|
|
||
|
def printTree(self):
|
||
|
tree = unicode(self)
|
||
|
for child in self.childNodes:
|
||
|
tree += child.printTree(2)
|
||
|
return tree
|
||
|
|
||
|
def cloneNode(self):
|
||
|
return Document()
|
||
|
|
||
|
class DocumentFragment(Document):
|
||
|
type = 2
|
||
|
def __str__(self):
|
||
|
return "#document-fragment"
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return str(self)
|
||
|
|
||
|
def cloneNode(self):
|
||
|
return DocumentFragment()
|
||
|
|
||
|
class DocumentType(Node):
|
||
|
type = 3
|
||
|
def __init__(self, name, publicId, systemId):
|
||
|
Node.__init__(self, name)
|
||
|
self.publicId = publicId
|
||
|
self.systemId = systemId
|
||
|
|
||
|
def __unicode__(self):
|
||
|
if self.publicId or self.systemId:
|
||
|
publicId = self.publicId or ""
|
||
|
systemId = self.systemId or ""
|
||
|
return """<!DOCTYPE %s "%s" "%s">"""%(
|
||
|
self.name, publicId, systemId)
|
||
|
|
||
|
else:
|
||
|
return u"<!DOCTYPE %s>" % self.name
|
||
|
|
||
|
|
||
|
toxml = __unicode__
|
||
|
|
||
|
def hilite(self):
|
||
|
return '<code class="markup doctype"><!DOCTYPE %s></code>' % self.name
|
||
|
|
||
|
def cloneNode(self):
|
||
|
return DocumentType(self.name, self.publicId, self.systemId)
|
||
|
|
||
|
class TextNode(Node):
|
||
|
type = 4
|
||
|
def __init__(self, value):
|
||
|
Node.__init__(self, None)
|
||
|
self.value = value
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return u"\"%s\"" % self.value
|
||
|
|
||
|
def toxml(self):
|
||
|
return escape(self.value)
|
||
|
|
||
|
hilite = toxml
|
||
|
|
||
|
def cloneNode(self):
|
||
|
return TextNode(self.value)
|
||
|
|
||
|
class Element(Node):
|
||
|
type = 5
|
||
|
def __init__(self, name, namespace=None):
|
||
|
Node.__init__(self, name)
|
||
|
self.namespace = namespace
|
||
|
self.attributes = {}
|
||
|
|
||
|
def __unicode__(self):
|
||
|
if self.namespace == None:
|
||
|
return u"<%s>" % self.name
|
||
|
else:
|
||
|
return u"<%s %s>"%(prefixes[self.namespace], self.name)
|
||
|
|
||
|
def toxml(self):
|
||
|
result = '<' + self.name
|
||
|
if self.attributes:
|
||
|
for name,value in self.attributes.iteritems():
|
||
|
result += u' %s="%s"' % (name, escape(value,{'"':'"'}))
|
||
|
if self.childNodes:
|
||
|
result += '>'
|
||
|
for child in self.childNodes:
|
||
|
result += child.toxml()
|
||
|
result += u'</%s>' % self.name
|
||
|
else:
|
||
|
result += u'/>'
|
||
|
return result
|
||
|
|
||
|
def hilite(self):
|
||
|
result = '<<code class="markup element-name">%s</code>' % self.name
|
||
|
if self.attributes:
|
||
|
for name, value in self.attributes.iteritems():
|
||
|
result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'"'}))
|
||
|
if self.childNodes:
|
||
|
result += ">"
|
||
|
for child in self.childNodes:
|
||
|
result += child.hilite()
|
||
|
elif self.name in voidElements:
|
||
|
return result + ">"
|
||
|
return result + '</<code class="markup element-name">%s</code>>' % self.name
|
||
|
|
||
|
def printTree(self, indent):
|
||
|
tree = '\n|%s%s' % (' '*indent, unicode(self))
|
||
|
indent += 2
|
||
|
if self.attributes:
|
||
|
for name, value in sorted(self.attributes.iteritems()):
|
||
|
if isinstance(name, tuple):
|
||
|
name = "%s %s"%(name[0], name[1])
|
||
|
tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
|
||
|
for child in self.childNodes:
|
||
|
tree += child.printTree(indent)
|
||
|
return tree
|
||
|
|
||
|
def cloneNode(self):
|
||
|
newNode = Element(self.name)
|
||
|
if hasattr(self, 'namespace'):
|
||
|
newNode.namespace = self.namespace
|
||
|
for attr, value in self.attributes.iteritems():
|
||
|
newNode.attributes[attr] = value
|
||
|
return newNode
|
||
|
|
||
|
class CommentNode(Node):
|
||
|
type = 6
|
||
|
def __init__(self, data):
|
||
|
Node.__init__(self, None)
|
||
|
self.data = data
|
||
|
|
||
|
def __unicode__(self):
|
||
|
return "<!-- %s -->" % self.data
|
||
|
|
||
|
def toxml(self):
|
||
|
return "<!--%s-->" % self.data
|
||
|
|
||
|
def hilite(self):
|
||
|
return '<code class="markup comment"><!--%s--></code>' % escape(self.data)
|
||
|
|
||
|
def cloneNode(self):
|
||
|
return CommentNode(self.data)
|
||
|
|
||
|
class TreeBuilder(_base.TreeBuilder):
|
||
|
documentClass = Document
|
||
|
doctypeClass = DocumentType
|
||
|
elementClass = Element
|
||
|
commentClass = CommentNode
|
||
|
fragmentClass = DocumentFragment
|
||
|
|
||
|
def testSerializer(self, node):
|
||
|
return node.printTree()
|