from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE try: from types import ModuleType except: from new import module as ModuleType import re import weakref import _base from html5lib import constants, ihatexml from html5lib.constants import namespaces moduleCache = {} def getDomModule(DomImplementation): name = "_" + DomImplementation.__name__+"builder" if name in moduleCache: return moduleCache[name] else: mod = ModuleType(name) objs = getDomBuilder(DomImplementation) mod.__dict__.update(objs) moduleCache[name] = mod return mod def getDomBuilder(DomImplementation): Dom = DomImplementation class AttrList(object): def __init__(self, element): self.element = element def __iter__(self): return self.element.attributes.items().__iter__() def __setitem__(self, name, value): self.element.setAttribute(name, value) def __len__(self): return len(self.element.attributes.items()) def items(self): return [(item[0], item[1]) for item in self.element.attributes.items()] def keys(self): return self.element.attributes.keys() def __getitem__(self, name): return self.element.getAttribute(name) def __contains__(self, name): if isinstance(name, tuple): raise NotImplementedError else: return self.element.hasAttribute(name) class NodeBuilder(_base.Node): def __init__(self, element): _base.Node.__init__(self, element.nodeName) self.element = element namespace = property(lambda self:hasattr(self.element, "namespaceURI") and self.element.namespaceURI or None) def appendChild(self, node): node.parent = self self.element.appendChild(node.element) def insertText(self, data, insertBefore=None): text = self.element.ownerDocument.createTextNode(data) if insertBefore: self.element.insertBefore(text, insertBefore.element) else: self.element.appendChild(text) def insertBefore(self, node, refNode): self.element.insertBefore(node.element, refNode.element) node.parent = self def removeChild(self, node): if node.element.parentNode == self.element: self.element.removeChild(node.element) node.parent = None def reparentChildren(self, newParent): while self.element.hasChildNodes(): child = self.element.firstChild self.element.removeChild(child) newParent.element.appendChild(child) self.childNodes = [] def getAttributes(self): return AttrList(self.element) def setAttributes(self, attributes): if attributes: for name, value in attributes.items(): if isinstance(name, tuple): if name[0] is not None: qualifiedName = (name[0] + ":" + name[1]) else: qualifiedName = name[1] self.element.setAttributeNS(name[2], qualifiedName, value) else: self.element.setAttribute( name, value) attributes = property(getAttributes, setAttributes) def cloneNode(self): return NodeBuilder(self.element.cloneNode(False)) def hasContent(self): return self.element.hasChildNodes() def getNameTuple(self): if self.namespace == None: return namespaces["html"], self.name else: return self.namespace, self.name nameTuple = property(getNameTuple) class TreeBuilder(_base.TreeBuilder): def documentClass(self): self.dom = Dom.getDOMImplementation().createDocument(None,None,None) return weakref.proxy(self) def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] domimpl = Dom.getDOMImplementation() doctype = domimpl.createDocumentType(name, publicId, systemId) self.document.appendChild(NodeBuilder(doctype)) if Dom == minidom: doctype.ownerDocument = self.dom def elementClass(self, name, namespace=None): if namespace is None and self.defaultNamespace is None: node = self.dom.createElement(name) else: node = self.dom.createElementNS(namespace, name) return NodeBuilder(node) def commentClass(self, data): return NodeBuilder(self.dom.createComment(data)) def fragmentClass(self): return NodeBuilder(self.dom.createDocumentFragment()) def appendChild(self, node): self.dom.appendChild(node.element) def testSerializer(self, element): return testSerializer(element) def getDocument(self): return self.dom def getFragment(self): return _base.TreeBuilder.getFragment(self).element def insertText(self, data, parent=None): data=data if parent <> self: _base.TreeBuilder.insertText(self, data, parent) else: # HACK: allow text nodes as children of the document node if hasattr(self.dom, '_child_node_types'): if not Node.TEXT_NODE in self.dom._child_node_types: self.dom._child_node_types=list(self.dom._child_node_types) self.dom._child_node_types.append(Node.TEXT_NODE) self.dom.appendChild(self.dom.createTextNode(data)) name = None def testSerializer(element): element.normalize() rv = [] def serializeElement(element, indent=0): if element.nodeType == Node.DOCUMENT_TYPE_NODE: if element.name: if element.publicId or element.systemId: publicId = element.publicId or "" systemId = element.systemId or "" rv.append( """|%s"""%( ' '*indent, element.name, publicId, systemId)) else: rv.append("|%s"%(' '*indent, element.name)) else: rv.append("|%s"%(' '*indent,)) elif element.nodeType == Node.DOCUMENT_NODE: rv.append("#document") elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: rv.append("#document-fragment") elif element.nodeType == Node.COMMENT_NODE: rv.append("|%s"%(' '*indent, element.nodeValue)) elif element.nodeType == Node.TEXT_NODE: rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue)) else: if (hasattr(element, "namespaceURI") and element.namespaceURI != None): name = "%s %s"%(constants.prefixes[element.namespaceURI], element.nodeName) else: name = element.nodeName rv.append("|%s<%s>"%(' '*indent, name)) if element.hasAttributes(): attributes = [] for i in range(len(element.attributes)): attr = element.attributes.item(i) name = attr.nodeName value = attr.value ns = attr.namespaceURI if ns: name = "%s %s"%(constants.prefixes[ns], attr.localName) else: name = attr.nodeName attributes.append((name, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' '*(indent+2), name, value)) indent += 2 for child in element.childNodes: serializeElement(child, indent) serializeElement(element, 0) return "\n".join(rv) def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}): if node.nodeType == Node.ELEMENT_NODE: if not nsmap: handler.startElement(node.nodeName, node.attributes) for child in node.childNodes: dom2sax(child, handler, nsmap) handler.endElement(node.nodeName) else: attributes = dict(node.attributes.itemsNS()) # gather namespace declarations prefixes = [] for attrname in node.attributes.keys(): attr = node.getAttributeNode(attrname) if (attr.namespaceURI == XMLNS_NAMESPACE or (attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))): prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None) handler.startPrefixMapping(prefix, attr.nodeValue) prefixes.append(prefix) nsmap = nsmap.copy() nsmap[prefix] = attr.nodeValue del attributes[(attr.namespaceURI, attr.nodeName)] # apply namespace declarations for attrname in node.attributes.keys(): attr = node.getAttributeNode(attrname) if attr.namespaceURI == None and ':' in attr.nodeName: prefix = attr.nodeName.split(':')[0] if nsmap.has_key(prefix): del attributes[(attr.namespaceURI, attr.nodeName)] attributes[(nsmap[prefix],attr.nodeName)]=attr.nodeValue # SAX events ns = node.namespaceURI or nsmap.get(None,None) handler.startElementNS((ns,node.nodeName), node.nodeName, attributes) for child in node.childNodes: dom2sax(child, handler, nsmap) handler.endElementNS((ns, node.nodeName), node.nodeName) for prefix in prefixes: handler.endPrefixMapping(prefix) elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]: handler.characters(node.nodeValue) elif node.nodeType == Node.DOCUMENT_NODE: handler.startDocument() for child in node.childNodes: dom2sax(child, handler, nsmap) handler.endDocument() elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: for child in node.childNodes: dom2sax(child, handler, nsmap) else: # ATTRIBUTE_NODE # ENTITY_NODE # PROCESSING_INSTRUCTION_NODE # COMMENT_NODE # DOCUMENT_TYPE_NODE # NOTATION_NODE pass return locals() # Keep backwards compatibility with things that directly load # classes/functions from this module for key, value in getDomModule(minidom).__dict__.items(): globals()[key] = value