2014-03-10 01:18:05 -04:00
|
|
|
"""A collection of modules for iterating through different kinds of
|
|
|
|
tree, generating tokens identical to those produced by the tokenizer
|
|
|
|
module.
|
|
|
|
|
|
|
|
To create a tree walker for a new type of tree, you need to do
|
|
|
|
implement a tree walker object (called TreeWalker by convention) that
|
|
|
|
implements a 'serialize' method taking a tree as sole argument and
|
|
|
|
returning an iterator generating tokens.
|
|
|
|
"""
|
|
|
|
|
2014-07-21 19:01:46 -04:00
|
|
|
from __future__ import absolute_import, division, unicode_literals
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
from ..utils import default_etree
|
|
|
|
|
2014-03-10 01:18:05 -04:00
|
|
|
treeWalkerCache = {}
|
|
|
|
|
2014-07-21 19:01:46 -04:00
|
|
|
|
2014-03-10 01:18:05 -04:00
|
|
|
def getTreeWalker(treeType, implementation=None, **kwargs):
|
|
|
|
"""Get a TreeWalker class for various types of tree with built-in support
|
|
|
|
|
|
|
|
treeType - the name of the tree type required (case-insensitive). Supported
|
2014-07-21 19:01:46 -04:00
|
|
|
values are:
|
2014-03-10 01:18:05 -04:00
|
|
|
|
|
|
|
"dom" - The xml.dom.minidom DOM implementation
|
|
|
|
"pulldom" - The xml.dom.pulldom event stream
|
|
|
|
"etree" - A generic walker for tree implementations exposing an
|
|
|
|
elementtree-like interface (known to work with
|
|
|
|
ElementTree, cElementTree and lxml.etree).
|
|
|
|
"lxml" - Optimized walker for lxml.etree
|
|
|
|
"genshi" - a Genshi stream
|
|
|
|
|
|
|
|
implementation - (Currently applies to the "etree" tree type only). A module
|
|
|
|
implementing the tree type e.g. xml.etree.ElementTree or
|
|
|
|
cElementTree."""
|
|
|
|
|
|
|
|
treeType = treeType.lower()
|
|
|
|
if treeType not in treeWalkerCache:
|
2014-07-21 19:01:46 -04:00
|
|
|
if treeType in ("dom", "pulldom"):
|
|
|
|
name = "%s.%s" % (__name__, treeType)
|
|
|
|
__import__(name)
|
|
|
|
mod = sys.modules[name]
|
2014-03-10 01:18:05 -04:00
|
|
|
treeWalkerCache[treeType] = mod.TreeWalker
|
|
|
|
elif treeType == "genshi":
|
2014-07-21 19:01:46 -04:00
|
|
|
from . import genshistream
|
2014-03-10 01:18:05 -04:00
|
|
|
treeWalkerCache[treeType] = genshistream.TreeWalker
|
|
|
|
elif treeType == "lxml":
|
2014-07-21 19:01:46 -04:00
|
|
|
from . import lxmletree
|
2014-03-10 01:18:05 -04:00
|
|
|
treeWalkerCache[treeType] = lxmletree.TreeWalker
|
|
|
|
elif treeType == "etree":
|
2014-07-21 19:01:46 -04:00
|
|
|
from . import etree
|
|
|
|
if implementation is None:
|
|
|
|
implementation = default_etree
|
2014-03-10 01:18:05 -04:00
|
|
|
# XXX: NEVER cache here, caching is done in the etree submodule
|
|
|
|
return etree.getETreeModule(implementation, **kwargs).TreeWalker
|
|
|
|
return treeWalkerCache.get(treeType)
|