2013-10-11 17:28:32 +00:00
|
|
|
from __future__ import absolute_import, division, unicode_literals
|
|
|
|
|
2018-12-15 00:08:54 +00:00
|
|
|
from collections import OrderedDict
|
2013-10-11 17:28:32 +00:00
|
|
|
import re
|
|
|
|
|
2015-11-04 12:01:55 +00:00
|
|
|
from six import string_types
|
2013-10-11 17:28:32 +00:00
|
|
|
|
2018-12-15 00:08:54 +00:00
|
|
|
from . import base
|
|
|
|
from .._utils import moduleFactoryFactory
|
2013-10-11 17:28:32 +00:00
|
|
|
|
|
|
|
tag_regexp = re.compile("{([^}]*)}(.*)")
|
|
|
|
|
|
|
|
|
|
|
|
def getETreeBuilder(ElementTreeImplementation):
|
|
|
|
ElementTree = ElementTreeImplementation
|
|
|
|
ElementTreeCommentType = ElementTree.Comment("asd").tag
|
|
|
|
|
2018-12-15 00:08:54 +00:00
|
|
|
class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable
|
2013-10-11 17:28:32 +00:00
|
|
|
"""Given the particular ElementTree representation, this implementation,
|
|
|
|
to avoid using recursion, returns "nodes" as tuples with the following
|
|
|
|
content:
|
|
|
|
|
|
|
|
1. The current element
|
|
|
|
|
|
|
|
2. The index of the element relative to its parent
|
|
|
|
|
|
|
|
3. A stack of ancestor elements
|
|
|
|
|
|
|
|
4. A flag "text", "tail" or None to indicate if the current node is a
|
|
|
|
text node; either the text or tail of the current element (1)
|
|
|
|
"""
|
|
|
|
def getNodeDetails(self, node):
|
|
|
|
if isinstance(node, tuple): # It might be the root Element
|
2018-12-15 00:08:54 +00:00
|
|
|
elt, _, _, flag = node
|
2013-10-11 17:28:32 +00:00
|
|
|
if flag in ("text", "tail"):
|
2018-12-15 00:08:54 +00:00
|
|
|
return base.TEXT, getattr(elt, flag)
|
2013-10-11 17:28:32 +00:00
|
|
|
else:
|
|
|
|
node = elt
|
|
|
|
|
|
|
|
if not(hasattr(node, "tag")):
|
|
|
|
node = node.getroot()
|
|
|
|
|
|
|
|
if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
|
2018-12-15 00:08:54 +00:00
|
|
|
return (base.DOCUMENT,)
|
2013-10-11 17:28:32 +00:00
|
|
|
|
|
|
|
elif node.tag == "<!DOCTYPE>":
|
2018-12-15 00:08:54 +00:00
|
|
|
return (base.DOCTYPE, node.text,
|
2013-10-11 17:28:32 +00:00
|
|
|
node.get("publicId"), node.get("systemId"))
|
|
|
|
|
|
|
|
elif node.tag == ElementTreeCommentType:
|
2018-12-15 00:08:54 +00:00
|
|
|
return base.COMMENT, node.text
|
2013-10-11 17:28:32 +00:00
|
|
|
|
|
|
|
else:
|
2015-11-04 12:01:55 +00:00
|
|
|
assert isinstance(node.tag, string_types), type(node.tag)
|
2013-10-11 17:28:32 +00:00
|
|
|
# This is assumed to be an ordinary element
|
|
|
|
match = tag_regexp.match(node.tag)
|
|
|
|
if match:
|
|
|
|
namespace, tag = match.groups()
|
|
|
|
else:
|
|
|
|
namespace = None
|
|
|
|
tag = node.tag
|
|
|
|
attrs = OrderedDict()
|
|
|
|
for name, value in list(node.attrib.items()):
|
|
|
|
match = tag_regexp.match(name)
|
|
|
|
if match:
|
|
|
|
attrs[(match.group(1), match.group(2))] = value
|
|
|
|
else:
|
|
|
|
attrs[(None, name)] = value
|
2018-12-15 00:08:54 +00:00
|
|
|
return (base.ELEMENT, namespace, tag,
|
2013-10-11 17:28:32 +00:00
|
|
|
attrs, len(node) or node.text)
|
|
|
|
|
|
|
|
def getFirstChild(self, node):
|
|
|
|
if isinstance(node, tuple):
|
|
|
|
element, key, parents, flag = node
|
|
|
|
else:
|
|
|
|
element, key, parents, flag = node, None, [], None
|
|
|
|
|
|
|
|
if flag in ("text", "tail"):
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
if element.text:
|
|
|
|
return element, key, parents, "text"
|
|
|
|
elif len(element):
|
|
|
|
parents.append(element)
|
|
|
|
return element[0], 0, parents, None
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def getNextSibling(self, node):
|
|
|
|
if isinstance(node, tuple):
|
|
|
|
element, key, parents, flag = node
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
if flag == "text":
|
|
|
|
if len(element):
|
|
|
|
parents.append(element)
|
|
|
|
return element[0], 0, parents, None
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
if element.tail and flag != "tail":
|
|
|
|
return element, key, parents, "tail"
|
|
|
|
elif key < len(parents[-1]) - 1:
|
|
|
|
return parents[-1][key + 1], key + 1, parents, None
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def getParentNode(self, node):
|
|
|
|
if isinstance(node, tuple):
|
|
|
|
element, key, parents, flag = node
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
if flag == "text":
|
|
|
|
if not parents:
|
|
|
|
return element
|
|
|
|
else:
|
|
|
|
return element, key, parents, None
|
|
|
|
else:
|
|
|
|
parent = parents.pop()
|
|
|
|
if not parents:
|
|
|
|
return parent
|
|
|
|
else:
|
2018-12-15 00:08:54 +00:00
|
|
|
assert list(parents[-1]).count(parent) == 1
|
2013-10-11 17:28:32 +00:00
|
|
|
return parent, list(parents[-1]).index(parent), parents, None
|
|
|
|
|
|
|
|
return locals()
|
|
|
|
|
|
|
|
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|