mirror of
https://github.com/moparisthebest/SickRage
synced 2024-12-14 20:12:19 -05:00
53 lines
2.4 KiB
Python
53 lines
2.4 KiB
Python
|
"""A collection of modules for iterating through different kinds of
|
||
|
tree, generating tokens identical to those produced by the tokenizer
|
||
|
module.
|
||
|
|
||
|
To create a tree walker for a new type of tree, you need to do
|
||
|
implement a tree walker object (called TreeWalker by convention) that
|
||
|
implements a 'serialize' method taking a tree as sole argument and
|
||
|
returning an iterator generating tokens.
|
||
|
"""
|
||
|
|
||
|
treeWalkerCache = {}
|
||
|
|
||
|
def getTreeWalker(treeType, implementation=None, **kwargs):
|
||
|
"""Get a TreeWalker class for various types of tree with built-in support
|
||
|
|
||
|
treeType - the name of the tree type required (case-insensitive). Supported
|
||
|
values are "simpletree", "dom", "etree" and "beautifulsoup"
|
||
|
|
||
|
"simpletree" - a built-in DOM-ish tree type with support for some
|
||
|
more pythonic idioms.
|
||
|
"dom" - The xml.dom.minidom DOM implementation
|
||
|
"pulldom" - The xml.dom.pulldom event stream
|
||
|
"etree" - A generic walker for tree implementations exposing an
|
||
|
elementtree-like interface (known to work with
|
||
|
ElementTree, cElementTree and lxml.etree).
|
||
|
"lxml" - Optimized walker for lxml.etree
|
||
|
"beautifulsoup" - Beautiful soup (if installed)
|
||
|
"genshi" - a Genshi stream
|
||
|
|
||
|
implementation - (Currently applies to the "etree" tree type only). A module
|
||
|
implementing the tree type e.g. xml.etree.ElementTree or
|
||
|
cElementTree."""
|
||
|
|
||
|
treeType = treeType.lower()
|
||
|
if treeType not in treeWalkerCache:
|
||
|
if treeType in ("dom", "pulldom", "simpletree"):
|
||
|
mod = __import__(treeType, globals())
|
||
|
treeWalkerCache[treeType] = mod.TreeWalker
|
||
|
elif treeType == "genshi":
|
||
|
import genshistream
|
||
|
treeWalkerCache[treeType] = genshistream.TreeWalker
|
||
|
elif treeType == "beautifulsoup":
|
||
|
import soup
|
||
|
treeWalkerCache[treeType] = soup.TreeWalker
|
||
|
elif treeType == "lxml":
|
||
|
import lxmletree
|
||
|
treeWalkerCache[treeType] = lxmletree.TreeWalker
|
||
|
elif treeType == "etree":
|
||
|
import etree
|
||
|
# XXX: NEVER cache here, caching is done in the etree submodule
|
||
|
return etree.getETreeModule(implementation, **kwargs).TreeWalker
|
||
|
return treeWalkerCache.get(treeType)
|