mirror of
https://github.com/moparisthebest/SickRage
synced 2025-01-08 04:18:09 -05:00
566 lines
21 KiB
Cython
566 lines
21 KiB
Cython
# Configurable Element class lookup
|
|
|
|
################################################################################
|
|
# Custom Element classes
|
|
|
|
cdef public class ElementBase(_Element) [ type LxmlElementBaseType,
|
|
object LxmlElementBase ]:
|
|
u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
|
|
|
The public Element class. All custom Element classes must inherit
|
|
from this one. To create an Element, use the `Element()` factory.
|
|
|
|
BIG FAT WARNING: Subclasses *must not* override __init__ or
|
|
__new__ as it is absolutely undefined when these objects will be
|
|
created or destroyed. All persistent state of Elements must be
|
|
stored in the underlying XML. If you really need to initialize
|
|
the object after creation, you can implement an ``_init(self)``
|
|
method that will be called directly after object creation.
|
|
|
|
Subclasses of this class can be instantiated to create a new
|
|
Element. By default, the tag name will be the class name and the
|
|
namespace will be empty. You can modify this with the following
|
|
class attributes:
|
|
|
|
* TAG - the tag name, possibly containing a namespace in Clark
|
|
notation
|
|
|
|
* NAMESPACE - the default namespace URI, unless provided as part
|
|
of the TAG attribute.
|
|
|
|
* HTML - flag if the class is an HTML tag, as opposed to an XML
|
|
tag. This only applies to un-namespaced tags and defaults to
|
|
false (i.e. XML).
|
|
|
|
* PARSER - the parser that provides the configuration for the
|
|
newly created document. Providing an HTML parser here will
|
|
default to creating an HTML element.
|
|
|
|
In user code, the latter three are commonly inherited in class
|
|
hierarchies that implement a common namespace.
|
|
"""
|
|
def __init__(self, *children, attrib=None, nsmap=None, **_extra):
|
|
u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
|
"""
|
|
cdef bint is_html = 0
|
|
cdef _BaseParser parser
|
|
cdef _Element last_child
|
|
# don't use normal attribute access as it might be overridden
|
|
_getattr = object.__getattribute__
|
|
try:
|
|
namespace = _utf8(_getattr(self, 'NAMESPACE'))
|
|
except AttributeError:
|
|
namespace = None
|
|
try:
|
|
ns, tag = _getNsTag(_getattr(self, 'TAG'))
|
|
if ns is not None:
|
|
namespace = ns
|
|
except AttributeError:
|
|
tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__'))
|
|
if b'.' in tag:
|
|
tag = tag.split(b'.')[-1]
|
|
try:
|
|
parser = _getattr(self, 'PARSER')
|
|
except AttributeError:
|
|
parser = None
|
|
for child in children:
|
|
if isinstance(child, _Element):
|
|
parser = (<_Element>child)._doc._parser
|
|
break
|
|
if isinstance(parser, HTMLParser):
|
|
is_html = 1
|
|
if namespace is None:
|
|
try:
|
|
is_html = _getattr(self, 'HTML')
|
|
except AttributeError:
|
|
pass
|
|
_initNewElement(self, is_html, tag, namespace, parser,
|
|
attrib, nsmap, _extra)
|
|
last_child = None
|
|
for child in children:
|
|
if _isString(child):
|
|
if last_child is None:
|
|
_setNodeText(self._c_node,
|
|
(_collectText(self._c_node.children) or '') + child)
|
|
else:
|
|
_setTailText(last_child._c_node,
|
|
(_collectText(last_child._c_node.next) or '') + child)
|
|
elif isinstance(child, _Element):
|
|
last_child = child
|
|
_appendChild(self, last_child)
|
|
elif isinstance(child, type) and issubclass(child, ElementBase):
|
|
last_child = child()
|
|
_appendChild(self, last_child)
|
|
else:
|
|
raise TypeError, "Invalid child type: %r" % type(child)
|
|
|
|
cdef class CommentBase(_Comment):
|
|
u"""All custom Comment classes must inherit from this one.
|
|
|
|
To create an XML Comment instance, use the ``Comment()`` factory.
|
|
|
|
Subclasses *must not* override __init__ or __new__ as it is
|
|
absolutely undefined when these objects will be created or
|
|
destroyed. All persistent state of Comments must be stored in the
|
|
underlying XML. If you really need to initialize the object after
|
|
creation, you can implement an ``_init(self)`` method that will be
|
|
called after object creation.
|
|
"""
|
|
def __init__(self, text):
|
|
# copied from Comment() factory
|
|
cdef _Document doc
|
|
cdef xmlDoc* c_doc
|
|
if text is None:
|
|
text = b''
|
|
else:
|
|
text = _utf8(text)
|
|
c_doc = _newXMLDoc()
|
|
doc = _documentFactory(c_doc, None)
|
|
self._c_node = _createComment(c_doc, _xcstr(text))
|
|
if self._c_node is NULL:
|
|
raise MemoryError()
|
|
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
|
_registerProxy(self, doc, self._c_node)
|
|
self._init()
|
|
|
|
cdef class PIBase(_ProcessingInstruction):
|
|
u"""All custom Processing Instruction classes must inherit from this one.
|
|
|
|
To create an XML ProcessingInstruction instance, use the ``PI()``
|
|
factory.
|
|
|
|
Subclasses *must not* override __init__ or __new__ as it is
|
|
absolutely undefined when these objects will be created or
|
|
destroyed. All persistent state of PIs must be stored in the
|
|
underlying XML. If you really need to initialize the object after
|
|
creation, you can implement an ``_init(self)`` method that will be
|
|
called after object creation.
|
|
"""
|
|
def __init__(self, target, text=None):
|
|
# copied from PI() factory
|
|
cdef _Document doc
|
|
cdef xmlDoc* c_doc
|
|
target = _utf8(target)
|
|
if text is None:
|
|
text = b''
|
|
else:
|
|
text = _utf8(text)
|
|
c_doc = _newXMLDoc()
|
|
doc = _documentFactory(c_doc, None)
|
|
self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
|
|
if self._c_node is NULL:
|
|
raise MemoryError()
|
|
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
|
_registerProxy(self, doc, self._c_node)
|
|
self._init()
|
|
|
|
cdef class EntityBase(_Entity):
|
|
u"""All custom Entity classes must inherit from this one.
|
|
|
|
To create an XML Entity instance, use the ``Entity()`` factory.
|
|
|
|
Subclasses *must not* override __init__ or __new__ as it is
|
|
absolutely undefined when these objects will be created or
|
|
destroyed. All persistent state of Entities must be stored in the
|
|
underlying XML. If you really need to initialize the object after
|
|
creation, you can implement an ``_init(self)`` method that will be
|
|
called after object creation.
|
|
"""
|
|
def __init__(self, name):
|
|
cdef _Document doc
|
|
cdef xmlDoc* c_doc
|
|
name_utf = _utf8(name)
|
|
c_name = _xcstr(name_utf)
|
|
if c_name[0] == c'#':
|
|
if not _characterReferenceIsValid(c_name + 1):
|
|
raise ValueError, u"Invalid character reference: '%s'" % name
|
|
elif not _xmlNameIsValid(c_name):
|
|
raise ValueError, u"Invalid entity reference: '%s'" % name
|
|
c_doc = _newXMLDoc()
|
|
doc = _documentFactory(c_doc, None)
|
|
self._c_node = _createEntity(c_doc, c_name)
|
|
if self._c_node is NULL:
|
|
raise MemoryError()
|
|
tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
|
|
_registerProxy(self, doc, self._c_node)
|
|
self._init()
|
|
|
|
|
|
cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
|
|
if c_node.type == tree.XML_ELEMENT_NODE:
|
|
expected = ElementBase
|
|
elif c_node.type == tree.XML_COMMENT_NODE:
|
|
expected = CommentBase
|
|
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
|
expected = EntityBase
|
|
elif c_node.type == tree.XML_PI_NODE:
|
|
expected = PIBase
|
|
else:
|
|
assert 0, u"Unknown node type: %s" % c_node.type
|
|
|
|
if not (isinstance(cls, type) and issubclass(cls, expected)):
|
|
raise TypeError(
|
|
"result of class lookup must be subclass of %s, got %s"
|
|
% (type(expected), type(cls)))
|
|
return 0
|
|
|
|
|
|
################################################################################
|
|
# Element class lookup
|
|
|
|
ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*)
|
|
|
|
# class to store element class lookup functions
|
|
cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
|
|
object LxmlElementClassLookup ]:
|
|
u"""ElementClassLookup(self)
|
|
Superclass of Element class lookups.
|
|
"""
|
|
cdef _element_class_lookup_function _lookup_function
|
|
def __cinit__(self):
|
|
self._lookup_function = NULL # use default lookup
|
|
|
|
cdef public class FallbackElementClassLookup(ElementClassLookup) \
|
|
[ type LxmlFallbackElementClassLookupType,
|
|
object LxmlFallbackElementClassLookup ]:
|
|
u"""FallbackElementClassLookup(self, fallback=None)
|
|
|
|
Superclass of Element class lookups with additional fallback.
|
|
"""
|
|
cdef readonly ElementClassLookup fallback
|
|
cdef _element_class_lookup_function _fallback_function
|
|
def __cinit__(self):
|
|
# fall back to default lookup
|
|
self._fallback_function = _lookupDefaultElementClass
|
|
|
|
def __init__(self, ElementClassLookup fallback=None):
|
|
if fallback is not None:
|
|
self._setFallback(fallback)
|
|
else:
|
|
self._fallback_function = _lookupDefaultElementClass
|
|
|
|
cdef void _setFallback(self, ElementClassLookup lookup):
|
|
u"""Sets the fallback scheme for this lookup method.
|
|
"""
|
|
self.fallback = lookup
|
|
self._fallback_function = lookup._lookup_function
|
|
if self._fallback_function is NULL:
|
|
self._fallback_function = _lookupDefaultElementClass
|
|
|
|
def set_fallback(self, ElementClassLookup lookup not None):
|
|
u"""set_fallback(self, lookup)
|
|
|
|
Sets the fallback scheme for this lookup method.
|
|
"""
|
|
self._setFallback(lookup)
|
|
|
|
cdef inline object _callLookupFallback(FallbackElementClassLookup lookup,
|
|
_Document doc, xmlNode* c_node):
|
|
return lookup._fallback_function(lookup.fallback, doc, c_node)
|
|
|
|
|
|
################################################################################
|
|
# default lookup scheme
|
|
|
|
cdef class ElementDefaultClassLookup(ElementClassLookup):
|
|
u"""ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
|
|
Element class lookup scheme that always returns the default Element
|
|
class.
|
|
|
|
The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
|
|
accept the respective Element classes.
|
|
"""
|
|
cdef readonly object element_class
|
|
cdef readonly object comment_class
|
|
cdef readonly object pi_class
|
|
cdef readonly object entity_class
|
|
def __cinit__(self):
|
|
self._lookup_function = _lookupDefaultElementClass
|
|
|
|
def __init__(self, element=None, comment=None, pi=None, entity=None):
|
|
if element is None:
|
|
self.element_class = _Element
|
|
elif issubclass(element, ElementBase):
|
|
self.element_class = element
|
|
else:
|
|
raise TypeError, u"element class must be subclass of ElementBase"
|
|
|
|
if comment is None:
|
|
self.comment_class = _Comment
|
|
elif issubclass(comment, CommentBase):
|
|
self.comment_class = comment
|
|
else:
|
|
raise TypeError, u"comment class must be subclass of CommentBase"
|
|
|
|
if entity is None:
|
|
self.entity_class = _Entity
|
|
elif issubclass(entity, EntityBase):
|
|
self.entity_class = entity
|
|
else:
|
|
raise TypeError, u"Entity class must be subclass of EntityBase"
|
|
|
|
if pi is None:
|
|
self.pi_class = None # special case, see below
|
|
elif issubclass(pi, PIBase):
|
|
self.pi_class = pi
|
|
else:
|
|
raise TypeError, u"PI class must be subclass of PIBase"
|
|
|
|
cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
|
|
u"Trivial class lookup function that always returns the default class."
|
|
if c_node.type == tree.XML_ELEMENT_NODE:
|
|
if state is not None:
|
|
return (<ElementDefaultClassLookup>state).element_class
|
|
else:
|
|
return _Element
|
|
elif c_node.type == tree.XML_COMMENT_NODE:
|
|
if state is not None:
|
|
return (<ElementDefaultClassLookup>state).comment_class
|
|
else:
|
|
return _Comment
|
|
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
|
if state is not None:
|
|
return (<ElementDefaultClassLookup>state).entity_class
|
|
else:
|
|
return _Entity
|
|
elif c_node.type == tree.XML_PI_NODE:
|
|
if state is None or (<ElementDefaultClassLookup>state).pi_class is None:
|
|
# special case XSLT-PI
|
|
if c_node.name is not NULL and c_node.content is not NULL:
|
|
if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0:
|
|
if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \
|
|
tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL:
|
|
return _XSLTProcessingInstruction
|
|
return _ProcessingInstruction
|
|
else:
|
|
return (<ElementDefaultClassLookup>state).pi_class
|
|
else:
|
|
assert 0, u"Unknown node type: %s" % c_node.type
|
|
|
|
|
|
################################################################################
|
|
# attribute based lookup scheme
|
|
|
|
cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
|
|
u"""AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
|
|
Checks an attribute of an Element and looks up the value in a
|
|
class dictionary.
|
|
|
|
Arguments:
|
|
- attribute name - '{ns}name' style string
|
|
- class mapping - Python dict mapping attribute values to Element classes
|
|
- fallback - optional fallback lookup mechanism
|
|
|
|
A None key in the class mapping will be checked if the attribute is
|
|
missing.
|
|
"""
|
|
cdef object _class_mapping
|
|
cdef tuple _pytag
|
|
cdef const_xmlChar* _c_ns
|
|
cdef const_xmlChar* _c_name
|
|
def __cinit__(self):
|
|
self._lookup_function = _attribute_class_lookup
|
|
|
|
def __init__(self, attribute_name, class_mapping,
|
|
ElementClassLookup fallback=None):
|
|
self._pytag = _getNsTag(attribute_name)
|
|
ns, name = self._pytag
|
|
if ns is None:
|
|
self._c_ns = NULL
|
|
else:
|
|
self._c_ns = _xcstr(ns)
|
|
self._c_name = _xcstr(name)
|
|
self._class_mapping = dict(class_mapping)
|
|
|
|
FallbackElementClassLookup.__init__(self, fallback)
|
|
|
|
cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node):
|
|
cdef AttributeBasedElementClassLookup lookup
|
|
cdef python.PyObject* dict_result
|
|
|
|
lookup = <AttributeBasedElementClassLookup>state
|
|
if c_node.type == tree.XML_ELEMENT_NODE:
|
|
value = _attributeValueFromNsName(
|
|
c_node, lookup._c_ns, lookup._c_name)
|
|
dict_result = python.PyDict_GetItem(lookup._class_mapping, value)
|
|
if dict_result is not NULL:
|
|
cls = <object>dict_result
|
|
_validateNodeClass(c_node, cls)
|
|
return cls
|
|
return _callLookupFallback(lookup, doc, c_node)
|
|
|
|
|
|
################################################################################
|
|
# per-parser lookup scheme
|
|
|
|
cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
|
|
u"""ParserBasedElementClassLookup(self, fallback=None)
|
|
Element class lookup based on the XML parser.
|
|
"""
|
|
def __cinit__(self):
|
|
self._lookup_function = _parser_class_lookup
|
|
|
|
cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node):
|
|
if doc._parser._class_lookup is not None:
|
|
return doc._parser._class_lookup._lookup_function(
|
|
doc._parser._class_lookup, doc, c_node)
|
|
return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node)
|
|
|
|
|
|
################################################################################
|
|
# custom class lookup based on node type, namespace, name
|
|
|
|
cdef class CustomElementClassLookup(FallbackElementClassLookup):
|
|
u"""CustomElementClassLookup(self, fallback=None)
|
|
Element class lookup based on a subclass method.
|
|
|
|
You can inherit from this class and override the method::
|
|
|
|
lookup(self, type, doc, namespace, name)
|
|
|
|
to lookup the element class for a node. Arguments of the method:
|
|
* type: one of 'element', 'comment', 'PI', 'entity'
|
|
* doc: document that the node is in
|
|
* namespace: namespace URI of the node (or None for comments/PIs/entities)
|
|
* name: name of the element/entity, None for comments, target for PIs
|
|
|
|
If you return None from this method, the fallback will be called.
|
|
"""
|
|
def __cinit__(self):
|
|
self._lookup_function = _custom_class_lookup
|
|
|
|
def lookup(self, type, doc, namespace, name):
|
|
u"lookup(self, type, doc, namespace, name)"
|
|
return None
|
|
|
|
cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
|
|
cdef CustomElementClassLookup lookup
|
|
|
|
lookup = <CustomElementClassLookup>state
|
|
|
|
if c_node.type == tree.XML_ELEMENT_NODE:
|
|
element_type = u"element"
|
|
elif c_node.type == tree.XML_COMMENT_NODE:
|
|
element_type = u"comment"
|
|
elif c_node.type == tree.XML_PI_NODE:
|
|
element_type = u"PI"
|
|
elif c_node.type == tree.XML_ENTITY_REF_NODE:
|
|
element_type = u"entity"
|
|
else:
|
|
element_type = u"element"
|
|
if c_node.name is NULL:
|
|
name = None
|
|
else:
|
|
name = funicode(c_node.name)
|
|
c_str = tree._getNs(c_node)
|
|
ns = funicode(c_str) if c_str is not NULL else None
|
|
|
|
cls = lookup.lookup(element_type, doc, ns, name)
|
|
if cls is not None:
|
|
_validateNodeClass(c_node, cls)
|
|
return cls
|
|
return _callLookupFallback(lookup, doc, c_node)
|
|
|
|
|
|
################################################################################
|
|
# read-only tree based class lookup
|
|
|
|
cdef class PythonElementClassLookup(FallbackElementClassLookup):
|
|
u"""PythonElementClassLookup(self, fallback=None)
|
|
Element class lookup based on a subclass method.
|
|
|
|
This class lookup scheme allows access to the entire XML tree in
|
|
read-only mode. To use it, re-implement the ``lookup(self, doc,
|
|
root)`` method in a subclass::
|
|
|
|
from lxml import etree, pyclasslookup
|
|
|
|
class MyElementClass(etree.ElementBase):
|
|
honkey = True
|
|
|
|
class MyLookup(pyclasslookup.PythonElementClassLookup):
|
|
def lookup(self, doc, root):
|
|
if root.tag == "sometag":
|
|
return MyElementClass
|
|
else:
|
|
for child in root:
|
|
if child.tag == "someothertag":
|
|
return MyElementClass
|
|
# delegate to default
|
|
return None
|
|
|
|
If you return None from this method, the fallback will be called.
|
|
|
|
The first argument is the opaque document instance that contains
|
|
the Element. The second argument is a lightweight Element proxy
|
|
implementation that is only valid during the lookup. Do not try
|
|
to keep a reference to it. Once the lookup is done, the proxy
|
|
will be invalid.
|
|
|
|
Also, you cannot wrap such a read-only Element in an ElementTree,
|
|
and you must take care not to keep a reference to them outside of
|
|
the `lookup()` method.
|
|
|
|
Note that the API of the Element objects is not complete. It is
|
|
purely read-only and does not support all features of the normal
|
|
`lxml.etree` API (such as XPath, extended slicing or some
|
|
iteration methods).
|
|
|
|
See http://codespeak.net/lxml/element_classes.html
|
|
"""
|
|
def __cinit__(self):
|
|
self._lookup_function = _python_class_lookup
|
|
|
|
def lookup(self, doc, element):
|
|
u"""lookup(self, doc, element)
|
|
|
|
Override this method to implement your own lookup scheme.
|
|
"""
|
|
return None
|
|
|
|
cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node):
|
|
cdef PythonElementClassLookup lookup
|
|
cdef _ReadOnlyElementProxy proxy
|
|
lookup = <PythonElementClassLookup>state
|
|
|
|
proxy = _newReadOnlyProxy(None, c_node)
|
|
cls = lookup.lookup(doc, proxy)
|
|
_freeReadOnlyProxies(proxy)
|
|
|
|
if cls is not None:
|
|
_validateNodeClass(c_node, cls)
|
|
return cls
|
|
return _callLookupFallback(lookup, doc, c_node)
|
|
|
|
################################################################################
|
|
# Global setup
|
|
|
|
cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS
|
|
cdef object ELEMENT_CLASS_LOOKUP_STATE
|
|
|
|
cdef void _setElementClassLookupFunction(
|
|
_element_class_lookup_function function, object state):
|
|
global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE
|
|
if function is NULL:
|
|
state = DEFAULT_ELEMENT_CLASS_LOOKUP
|
|
function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function
|
|
|
|
ELEMENT_CLASS_LOOKUP_STATE = state
|
|
LOOKUP_ELEMENT_CLASS = function
|
|
|
|
def set_element_class_lookup(ElementClassLookup lookup = None):
|
|
u"""set_element_class_lookup(lookup = None)
|
|
|
|
Set the global default element class lookup method.
|
|
"""
|
|
if lookup is None or lookup._lookup_function is NULL:
|
|
_setElementClassLookupFunction(NULL, None)
|
|
else:
|
|
_setElementClassLookupFunction(lookup._lookup_function, lookup)
|
|
|
|
# default setup: parser delegation
|
|
cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP
|
|
DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup()
|
|
|
|
set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)
|