import _base from html5lib.constants import voidElements, namespaces, prefixes from xml.sax.saxutils import escape # Really crappy basic implementation of a DOM-core like thing class Node(_base.Node): type = -1 def __init__(self, name): self.name = name self.parent = None self.value = None self.childNodes = [] self._flags = [] def __iter__(self): for node in self.childNodes: yield node for item in node: yield item def __unicode__(self): return self.name def toxml(self): raise NotImplementedError def printTree(self, indent=0): tree = '\n|%s%s' % (' '* indent, unicode(self)) for child in self.childNodes: tree += child.printTree(indent + 2) return tree def appendChild(self, node): assert isinstance(node, Node) if (isinstance(node, TextNode) and self.childNodes and isinstance(self.childNodes[-1], TextNode)): self.childNodes[-1].value += node.value else: self.childNodes.append(node) node.parent = self def insertText(self, data, insertBefore=None): assert isinstance(data, unicode), "data %s is of type %s expected unicode"%(repr(data), type(data)) if insertBefore is None: self.appendChild(TextNode(data)) else: self.insertBefore(TextNode(data), insertBefore) def insertBefore(self, node, refNode): index = self.childNodes.index(refNode) if (isinstance(node, TextNode) and index > 0 and isinstance(self.childNodes[index - 1], TextNode)): self.childNodes[index - 1].value += node.value else: self.childNodes.insert(index, node) node.parent = self def removeChild(self, node): try: self.childNodes.remove(node) except: # XXX raise node.parent = None def cloneNode(self): raise NotImplementedError def hasContent(self): """Return true if the node has children or text""" return bool(self.childNodes) def getNameTuple(self): if self.namespace == None: return namespaces["html"], self.name else: return self.namespace, self.name nameTuple = property(getNameTuple) class Document(Node): type = 1 def __init__(self): Node.__init__(self, None) def __str__(self): return "#document" def __unicode__(self): return str(self) def appendChild(self, child): Node.appendChild(self, child) def toxml(self, encoding="utf=8"): result = "" for child in self.childNodes: result += child.toxml() return result.encode(encoding) def hilite(self, encoding="utf-8"): result = "
" for child in self.childNodes: result += child.hilite() return result.encode(encoding) + "" def printTree(self): tree = unicode(self) for child in self.childNodes: tree += child.printTree(2) return tree def cloneNode(self): return Document() class DocumentFragment(Document): type = 2 def __str__(self): return "#document-fragment" def __unicode__(self): return str(self) def cloneNode(self): return DocumentFragment() class DocumentType(Node): type = 3 def __init__(self, name, publicId, systemId): Node.__init__(self, name) self.publicId = publicId self.systemId = systemId def __unicode__(self): if self.publicId or self.systemId: publicId = self.publicId or "" systemId = self.systemId or "" return """"""%( self.name, publicId, systemId) else: return u"" % self.name toxml = __unicode__ def hilite(self): return '
<!DOCTYPE %s>
' % self.name
def cloneNode(self):
return DocumentType(self.name, self.publicId, self.systemId)
class TextNode(Node):
type = 4
def __init__(self, value):
Node.__init__(self, None)
self.value = value
def __unicode__(self):
return u"\"%s\"" % self.value
def toxml(self):
return escape(self.value)
hilite = toxml
def cloneNode(self):
return TextNode(self.value)
class Element(Node):
type = 5
def __init__(self, name, namespace=None):
Node.__init__(self, name)
self.namespace = namespace
self.attributes = {}
def __unicode__(self):
if self.namespace == None:
return u"<%s>" % self.name
else:
return u"<%s %s>"%(prefixes[self.namespace], self.name)
def toxml(self):
result = '<' + self.name
if self.attributes:
for name,value in self.attributes.iteritems():
result += u' %s="%s"' % (name, escape(value,{'"':'"'}))
if self.childNodes:
result += '>'
for child in self.childNodes:
result += child.toxml()
result += u'%s>' % self.name
else:
result += u'/>'
return result
def hilite(self):
result = '<%s
' % self.name
if self.attributes:
for name, value in self.attributes.iteritems():
result += ' %s
="%s"
' % (name, escape(value, {'"':'"'}))
if self.childNodes:
result += ">"
for child in self.childNodes:
result += child.hilite()
elif self.name in voidElements:
return result + ">"
return result + '</%s
>' % self.name
def printTree(self, indent):
tree = '\n|%s%s' % (' '*indent, unicode(self))
indent += 2
if self.attributes:
for name, value in sorted(self.attributes.iteritems()):
if isinstance(name, tuple):
name = "%s %s"%(name[0], name[1])
tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
for child in self.childNodes:
tree += child.printTree(indent)
return tree
def cloneNode(self):
newNode = Element(self.name)
if hasattr(self, 'namespace'):
newNode.namespace = self.namespace
for attr, value in self.attributes.iteritems():
newNode.attributes[attr] = value
return newNode
class CommentNode(Node):
type = 6
def __init__(self, data):
Node.__init__(self, None)
self.data = data
def __unicode__(self):
return "" % self.data
def toxml(self):
return "" % self.data
def hilite(self):
return '<!--%s-->
' % escape(self.data)
def cloneNode(self):
return CommentNode(self.data)
class TreeBuilder(_base.TreeBuilder):
documentClass = Document
doctypeClass = DocumentType
elementClass = Element
commentClass = CommentNode
fragmentClass = DocumentFragment
def testSerializer(self, node):
return node.printTree()