update Shared

This commit is contained in:
j 2015-11-04 13:01:55 +01:00
commit 6881f3471a
184 changed files with 13080 additions and 13691 deletions

View file

@ -10,8 +10,12 @@ returning an iterator generating tokens.
from __future__ import absolute_import, division, unicode_literals
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
"pulldom"]
import sys
from .. import constants
from ..utils import default_etree
treeWalkerCache = {}
@ -55,3 +59,89 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
# XXX: NEVER cache here, caching is done in the etree submodule
return etree.getETreeModule(implementation, **kwargs).TreeWalker
return treeWalkerCache.get(treeType)
def concatenateCharacterTokens(tokens):
pendingCharacters = []
for token in tokens:
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
pendingCharacters.append(token["data"])
else:
if pendingCharacters:
yield {"type": "Characters", "data": "".join(pendingCharacters)}
pendingCharacters = []
yield token
if pendingCharacters:
yield {"type": "Characters", "data": "".join(pendingCharacters)}
def pprint(walker):
"""Pretty printer for tree walkers"""
output = []
indent = 0
for token in concatenateCharacterTokens(walker):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
# tag name
if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
if token["namespace"] in constants.prefixes:
ns = constants.prefixes[token["namespace"]]
else:
ns = token["namespace"]
name = "%s %s" % (ns, token["name"])
else:
name = token["name"]
output.append("%s<%s>" % (" " * indent, name))
indent += 2
# attributes (sorted for consistent ordering)
attrs = token["data"]
for (namespace, localname), value in sorted(attrs.items()):
if namespace:
if namespace in constants.prefixes:
ns = constants.prefixes[namespace]
else:
ns = namespace
name = "%s %s" % (ns, localname)
else:
name = localname
output.append("%s%s=\"%s\"" % (" " * indent, name, value))
# self-closing
if type == "EmptyTag":
indent -= 2
elif type == "EndTag":
indent -= 2
elif type == "Comment":
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
elif type == "Doctype":
if token["name"]:
if token["publicId"]:
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
(" " * indent,
token["name"],
token["publicId"],
token["systemId"] if token["systemId"] else ""))
elif token["systemId"]:
output.append("""%s<!DOCTYPE %s "" "%s">""" %
(" " * indent,
token["name"],
token["systemId"]))
else:
output.append("%s<!DOCTYPE %s>" % (" " * indent,
token["name"]))
else:
output.append("%s<!DOCTYPE >" % (" " * indent,))
elif type == "Characters":
output.append("%s\"%s\"" % (" " * indent, token["data"]))
elif type == "SpaceCharacters":
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
else:
raise ValueError("Unknown token type, %s" % type)
return "\n".join(output)

View file

@ -1,8 +1,8 @@
from __future__ import absolute_import, division, unicode_literals
from six import text_type, string_types
import gettext
_ = gettext.gettext
__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
"TreeWalker", "NonRecursiveTreeWalker"]
from xml.dom import Node
@ -58,7 +58,7 @@ class TreeWalker(object):
"namespace": to_text(namespace),
"data": attrs}
if hasChildren:
yield self.error(_("Void element has children"))
yield self.error("Void element has children")
def startTag(self, namespace, name, attrs):
assert namespace is None or isinstance(namespace, string_types), type(namespace)
@ -122,7 +122,7 @@ class TreeWalker(object):
return {"type": "Entity", "name": text_type(name)}
def unknown(self, nodeType):
return self.error(_("Unknown node type: ") + nodeType)
return self.error("Unknown node type: " + nodeType)
class NonRecursiveTreeWalker(TreeWalker):

View file

@ -2,9 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
from xml.dom import Node
import gettext
_ = gettext.gettext
from . import _base

View file

@ -7,12 +7,10 @@ except ImportError:
from ordereddict import OrderedDict
except ImportError:
OrderedDict = dict
import gettext
_ = gettext.gettext
import re
from six import text_type
from six import string_types
from . import _base
from ..utils import moduleFactoryFactory
@ -60,7 +58,7 @@ def getETreeBuilder(ElementTreeImplementation):
return _base.COMMENT, node.text
else:
assert type(node.tag) == text_type, type(node.tag)
assert isinstance(node.tag, string_types), type(node.tag)
# This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match:

View file

@ -4,9 +4,6 @@ from six import text_type
from lxml import etree
from ..treebuilders.etree import tag_regexp
from gettext import gettext
_ = gettext
from . import _base
from .. import ihatexml
@ -130,7 +127,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getNodeDetails(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
return _base.TEXT, ensure_str(getattr(node, key))
elif isinstance(node, Root):
@ -169,7 +166,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
attrs, len(node) > 0 or node.text)
def getFirstChild(self, node):
assert not isinstance(node, tuple), _("Text nodes have no children")
assert not isinstance(node, tuple), "Text nodes have no children"
assert len(node) or node.text, "Node has no children"
if node.text:
@ -180,7 +177,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getNextSibling(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
if key == "text":
# XXX: we cannot use a "bool(node) and node[0] or None" construct here
# because node[0] might evaluate to False if it has no child element
@ -196,7 +193,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
def getParentNode(self, node):
if isinstance(node, tuple): # Text node
node, key = node
assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
if key == "text":
return node
# else: fallback to "normal" processing