openmedialibrary_platform/Shared/lib/python3.4/site-packages/html5lib/treewalkers/dom.py

44 lines
1.4 KiB
Python
Raw Normal View History

2013-10-11 17:28:32 +00:00
from __future__ import absolute_import, division, unicode_literals
from xml.dom import Node
2018-12-15 00:08:54 +00:00
from . import base
2013-10-11 17:28:32 +00:00
2018-12-15 00:08:54 +00:00
class TreeWalker(base.NonRecursiveTreeWalker):
2013-10-11 17:28:32 +00:00
def getNodeDetails(self, node):
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
2018-12-15 00:08:54 +00:00
return base.DOCTYPE, node.name, node.publicId, node.systemId
2013-10-11 17:28:32 +00:00
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
2018-12-15 00:08:54 +00:00
return base.TEXT, node.nodeValue
2013-10-11 17:28:32 +00:00
elif node.nodeType == Node.ELEMENT_NODE:
attrs = {}
for attr in list(node.attributes.keys()):
attr = node.getAttributeNode(attr)
if attr.namespaceURI:
attrs[(attr.namespaceURI, attr.localName)] = attr.value
else:
attrs[(None, attr.name)] = attr.value
2018-12-15 00:08:54 +00:00
return (base.ELEMENT, node.namespaceURI, node.nodeName,
2013-10-11 17:28:32 +00:00
attrs, node.hasChildNodes())
elif node.nodeType == Node.COMMENT_NODE:
2018-12-15 00:08:54 +00:00
return base.COMMENT, node.nodeValue
2013-10-11 17:28:32 +00:00
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
2018-12-15 00:08:54 +00:00
return (base.DOCUMENT,)
2013-10-11 17:28:32 +00:00
else:
2018-12-15 00:08:54 +00:00
return base.UNKNOWN, node.nodeType
2013-10-11 17:28:32 +00:00
def getFirstChild(self, node):
return node.firstChild
def getNextSibling(self, node):
return node.nextSibling
def getParentNode(self, node):
return node.parentNode