split platform
This commit is contained in:
commit
8c9b09577d
2261 changed files with 676163 additions and 0 deletions
223
lib/python3.5/site-packages/lxml/ElementInclude.py
Normal file
223
lib/python3.5/site-packages/lxml/ElementInclude.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
#
|
||||
# ElementTree
|
||||
# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
|
||||
#
|
||||
# limited xinclude support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-08-15 fl created
|
||||
# 2003-11-14 fl fixed default loader
|
||||
#
|
||||
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
Limited XInclude support for the ElementTree package.
|
||||
|
||||
While lxml.etree has full support for XInclude (see
|
||||
`etree.ElementTree.xinclude()`), this module provides a simpler, pure
|
||||
Python, ElementTree compatible implementation that supports a simple
|
||||
form of custom URL resolvers.
|
||||
"""
|
||||
|
||||
from lxml import etree
|
||||
import copy
|
||||
try:
|
||||
from urlparse import urljoin
|
||||
from urllib2 import urlopen
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from urllib.parse import urljoin
|
||||
from urllib.request import urlopen
|
||||
|
||||
try:
|
||||
set
|
||||
except NameError:
|
||||
# Python 2.3
|
||||
from sets import Set as set
|
||||
|
||||
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
||||
|
||||
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
||||
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
||||
|
||||
##
|
||||
# Fatal include error.
|
||||
|
||||
class FatalIncludeError(etree.LxmlSyntaxError):
|
||||
pass
|
||||
|
||||
##
|
||||
# ET compatible default loader.
|
||||
# This loader reads an included resource from disk.
|
||||
#
|
||||
# @param href Resource reference.
|
||||
# @param parse Parse mode. Either "xml" or "text".
|
||||
# @param encoding Optional text encoding.
|
||||
# @return The expanded resource. If the parse mode is "xml", this
|
||||
# is an ElementTree instance. If the parse mode is "text", this
|
||||
# is a Unicode string. If the loader fails, it can return None
|
||||
# or raise an IOError exception.
|
||||
# @throws IOError If the loader fails to load the resource.
|
||||
|
||||
def default_loader(href, parse, encoding=None):
|
||||
file = open(href, 'rb')
|
||||
if parse == "xml":
|
||||
data = etree.parse(file).getroot()
|
||||
else:
|
||||
data = file.read()
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
data = data.decode(encoding)
|
||||
file.close()
|
||||
return data
|
||||
|
||||
##
|
||||
# Default loader used by lxml.etree - handles custom resolvers properly
|
||||
#
|
||||
|
||||
def _lxml_default_loader(href, parse, encoding=None, parser=None):
|
||||
if parse == "xml":
|
||||
data = etree.parse(href, parser).getroot()
|
||||
else:
|
||||
if "://" in href:
|
||||
f = urlopen(href)
|
||||
else:
|
||||
f = open(href, 'rb')
|
||||
data = f.read()
|
||||
f.close()
|
||||
if not encoding:
|
||||
encoding = 'utf-8'
|
||||
data = data.decode(encoding)
|
||||
return data
|
||||
|
||||
##
|
||||
# Wrapper for ET compatibility - drops the parser
|
||||
|
||||
def _wrap_et_loader(loader):
|
||||
def load(href, parse, encoding=None, parser=None):
|
||||
return loader(href, parse, encoding)
|
||||
return load
|
||||
|
||||
|
||||
##
|
||||
# Expand XInclude directives.
|
||||
#
|
||||
# @param elem Root element.
|
||||
# @param loader Optional resource loader. If omitted, it defaults
|
||||
# to {@link default_loader}. If given, it should be a callable
|
||||
# that implements the same interface as <b>default_loader</b>.
|
||||
# @throws FatalIncludeError If the function fails to include a given
|
||||
# resource, or if the tree contains malformed XInclude elements.
|
||||
# @throws IOError If the function fails to load a given resource.
|
||||
# @returns the node or its replacement if it was an XInclude node
|
||||
|
||||
def include(elem, loader=None, base_url=None):
|
||||
if base_url is None:
|
||||
if hasattr(elem, 'getroot'):
|
||||
tree = elem
|
||||
elem = elem.getroot()
|
||||
else:
|
||||
tree = elem.getroottree()
|
||||
if hasattr(tree, 'docinfo'):
|
||||
base_url = tree.docinfo.URL
|
||||
elif hasattr(elem, 'getroot'):
|
||||
elem = elem.getroot()
|
||||
_include(elem, loader, base_url=base_url)
|
||||
|
||||
def _include(elem, loader=None, _parent_hrefs=None, base_url=None):
|
||||
if loader is not None:
|
||||
load_include = _wrap_et_loader(loader)
|
||||
else:
|
||||
load_include = _lxml_default_loader
|
||||
|
||||
if _parent_hrefs is None:
|
||||
_parent_hrefs = set()
|
||||
|
||||
parser = elem.getroottree().parser
|
||||
|
||||
include_elements = list(
|
||||
elem.iter('{http://www.w3.org/2001/XInclude}*'))
|
||||
|
||||
for e in include_elements:
|
||||
if e.tag == XINCLUDE_INCLUDE:
|
||||
# process xinclude directive
|
||||
href = urljoin(base_url, e.get("href"))
|
||||
parse = e.get("parse", "xml")
|
||||
parent = e.getparent()
|
||||
if parse == "xml":
|
||||
if href in _parent_hrefs:
|
||||
raise FatalIncludeError(
|
||||
"recursive include of %r detected" % href
|
||||
)
|
||||
_parent_hrefs.add(href)
|
||||
node = load_include(href, parse, parser=parser)
|
||||
if node is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
node = _include(node, loader, _parent_hrefs)
|
||||
if e.tail:
|
||||
node.tail = (node.tail or "") + e.tail
|
||||
if parent is None:
|
||||
return node # replaced the root node!
|
||||
parent.replace(e, node)
|
||||
elif parse == "text":
|
||||
text = load_include(href, parse, encoding=e.get("encoding"))
|
||||
if text is None:
|
||||
raise FatalIncludeError(
|
||||
"cannot load %r as %r" % (href, parse)
|
||||
)
|
||||
predecessor = e.getprevious()
|
||||
if predecessor is not None:
|
||||
predecessor.tail = (predecessor.tail or "") + text
|
||||
elif parent is None:
|
||||
return text # replaced the root node!
|
||||
else:
|
||||
parent.text = (parent.text or "") + text + (e.tail or "")
|
||||
parent.remove(e)
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"unknown parse type in xi:include tag (%r)" % parse
|
||||
)
|
||||
elif e.tag == XINCLUDE_FALLBACK:
|
||||
parent = e.getparent()
|
||||
if parent is not None and parent.tag != XINCLUDE_INCLUDE:
|
||||
raise FatalIncludeError(
|
||||
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
||||
)
|
||||
else:
|
||||
raise FatalIncludeError(
|
||||
"Invalid element found in XInclude namespace (%r)" % e.tag
|
||||
)
|
||||
return elem
|
||||
20
lib/python3.5/site-packages/lxml/__init__.py
Normal file
20
lib/python3.5/site-packages/lxml/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# this is a package
|
||||
|
||||
def get_include():
|
||||
"""
|
||||
Returns a list of header include paths (for lxml itself, libxml2
|
||||
and libxslt) needed to compile C code against lxml if it was built
|
||||
with statically linked libraries.
|
||||
"""
|
||||
import os
|
||||
lxml_path = __path__[0]
|
||||
include_path = os.path.join(lxml_path, 'includes')
|
||||
includes = [include_path, lxml_path]
|
||||
|
||||
for name in os.listdir(include_path):
|
||||
path = os.path.join(include_path, name)
|
||||
if os.path.isdir(path):
|
||||
includes.append(path)
|
||||
|
||||
return includes
|
||||
|
||||
315
lib/python3.5/site-packages/lxml/_elementpath.py
Normal file
315
lib/python3.5/site-packages/lxml/_elementpath.py
Normal file
|
|
@ -0,0 +1,315 @@
|
|||
#
|
||||
# ElementTree
|
||||
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
||||
#
|
||||
# limited xpath support for element trees
|
||||
#
|
||||
# history:
|
||||
# 2003-05-23 fl created
|
||||
# 2003-05-28 fl added support for // etc
|
||||
# 2003-08-27 fl fixed parsing of periods in element names
|
||||
# 2007-09-10 fl new selection engine
|
||||
# 2007-09-12 fl fixed parent selector
|
||||
# 2007-09-13 fl added iterfind; changed findall to return a list
|
||||
# 2007-11-30 fl added namespaces support
|
||||
# 2009-10-30 fl added child element value filter
|
||||
#
|
||||
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
||||
#
|
||||
# fredrik@pythonware.com
|
||||
# http://www.pythonware.com
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2009 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
##
|
||||
# Implementation module for XPath support. There's usually no reason
|
||||
# to import this module directly; the <b>ElementTree</b> does this for
|
||||
# you, if needed.
|
||||
##
|
||||
|
||||
import re
|
||||
|
||||
xpath_tokenizer_re = re.compile(
|
||||
"("
|
||||
"'[^']*'|\"[^\"]*\"|"
|
||||
"::|"
|
||||
"//?|"
|
||||
"\.\.|"
|
||||
"\(\)|"
|
||||
"[/.*:\[\]\(\)@=])|"
|
||||
"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
||||
"\s+"
|
||||
)
|
||||
|
||||
def xpath_tokenizer(pattern, namespaces=None):
|
||||
for token in xpath_tokenizer_re.findall(pattern):
|
||||
tag = token[1]
|
||||
if tag and tag[0] != "{" and ":" in tag:
|
||||
try:
|
||||
prefix, uri = tag.split(":", 1)
|
||||
if not namespaces:
|
||||
raise KeyError
|
||||
yield token[0], "{%s}%s" % (namespaces[prefix], uri)
|
||||
except KeyError:
|
||||
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
||||
else:
|
||||
yield token
|
||||
|
||||
|
||||
def prepare_child(next, token):
|
||||
tag = token[1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren(tag):
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_star(next, token):
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren('*'):
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_self(next, token):
|
||||
def select(result):
|
||||
return result
|
||||
return select
|
||||
|
||||
def prepare_descendant(next, token):
|
||||
token = next()
|
||||
if token[0] == "*":
|
||||
tag = "*"
|
||||
elif not token[0]:
|
||||
tag = token[1]
|
||||
else:
|
||||
raise SyntaxError("invalid descendant")
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterdescendants(tag):
|
||||
yield e
|
||||
return select
|
||||
|
||||
def prepare_parent(next, token):
|
||||
def select(result):
|
||||
for elem in result:
|
||||
parent = elem.getparent()
|
||||
if parent is not None:
|
||||
yield parent
|
||||
return select
|
||||
|
||||
def prepare_predicate(next, token):
|
||||
# FIXME: replace with real parser!!! refs:
|
||||
# http://effbot.org/zone/simple-iterator-parser.htm
|
||||
# http://javascript.crockford.com/tdop/tdop.html
|
||||
signature = []
|
||||
predicate = []
|
||||
while 1:
|
||||
token = next()
|
||||
if token[0] == "]":
|
||||
break
|
||||
if token[0] and token[0][:1] in "'\"":
|
||||
token = "'", token[0][1:-1]
|
||||
signature.append(token[0] or "-")
|
||||
predicate.append(token[1])
|
||||
signature = "".join(signature)
|
||||
# use signature to determine predicate type
|
||||
if signature == "@-":
|
||||
# [@attribute] predicate
|
||||
key = predicate[1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if elem.get(key) is not None:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "@-='":
|
||||
# [@attribute='value']
|
||||
key = predicate[1]
|
||||
value = predicate[-1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
if elem.get(key) == value:
|
||||
yield elem
|
||||
return select
|
||||
if signature == "-" and not re.match("-?\d+$", predicate[0]):
|
||||
# [tag]
|
||||
tag = predicate[0]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for _ in elem.iterchildren(tag):
|
||||
yield elem
|
||||
break
|
||||
return select
|
||||
if signature == "-='" and not re.match("-?\d+$", predicate[0]):
|
||||
# [tag='value']
|
||||
tag = predicate[0]
|
||||
value = predicate[-1]
|
||||
def select(result):
|
||||
for elem in result:
|
||||
for e in elem.iterchildren(tag):
|
||||
if "".join(e.itertext()) == value:
|
||||
yield elem
|
||||
break
|
||||
return select
|
||||
if signature == "-" or signature == "-()" or signature == "-()-":
|
||||
# [index] or [last()] or [last()-index]
|
||||
if signature == "-":
|
||||
# [index]
|
||||
index = int(predicate[0]) - 1
|
||||
if index < 0:
|
||||
if index == -1:
|
||||
raise SyntaxError(
|
||||
"indices in path predicates are 1-based, not 0-based")
|
||||
else:
|
||||
raise SyntaxError("path index >= 1 expected")
|
||||
else:
|
||||
if predicate[0] != "last":
|
||||
raise SyntaxError("unsupported function")
|
||||
if signature == "-()-":
|
||||
try:
|
||||
index = int(predicate[2]) - 1
|
||||
except ValueError:
|
||||
raise SyntaxError("unsupported expression")
|
||||
else:
|
||||
index = -1
|
||||
def select(result):
|
||||
for elem in result:
|
||||
parent = elem.getparent()
|
||||
if parent is None:
|
||||
continue
|
||||
try:
|
||||
# FIXME: what if the selector is "*" ?
|
||||
elems = list(parent.iterchildren(elem.tag))
|
||||
if elems[index] is elem:
|
||||
yield elem
|
||||
except IndexError:
|
||||
pass
|
||||
return select
|
||||
raise SyntaxError("invalid predicate")
|
||||
|
||||
ops = {
|
||||
"": prepare_child,
|
||||
"*": prepare_star,
|
||||
".": prepare_self,
|
||||
"..": prepare_parent,
|
||||
"//": prepare_descendant,
|
||||
"[": prepare_predicate,
|
||||
}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
_cache = {}
|
||||
|
||||
|
||||
def _build_path_iterator(path, namespaces):
|
||||
"""compile selector pattern"""
|
||||
if namespaces and (None in namespaces or '' in namespaces):
|
||||
raise ValueError("empty namespace prefix is not supported in ElementPath")
|
||||
if path[-1:] == "/":
|
||||
path += "*" # implicit all (FIXME: keep this?)
|
||||
cache_key = (path, namespaces and tuple(sorted(namespaces.items())) or None)
|
||||
try:
|
||||
return _cache[cache_key]
|
||||
except KeyError:
|
||||
pass
|
||||
if len(_cache) > 100:
|
||||
_cache.clear()
|
||||
|
||||
if path[:1] == "/":
|
||||
raise SyntaxError("cannot use absolute path on element")
|
||||
stream = iter(xpath_tokenizer(path, namespaces))
|
||||
try:
|
||||
_next = stream.next
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
_next = stream.__next__
|
||||
try:
|
||||
token = _next()
|
||||
except StopIteration:
|
||||
raise SyntaxError("empty path expression")
|
||||
selector = []
|
||||
while 1:
|
||||
try:
|
||||
selector.append(ops[token[0]](_next, token))
|
||||
except StopIteration:
|
||||
raise SyntaxError("invalid path")
|
||||
try:
|
||||
token = _next()
|
||||
if token[0] == "/":
|
||||
token = _next()
|
||||
except StopIteration:
|
||||
break
|
||||
_cache[cache_key] = selector
|
||||
return selector
|
||||
|
||||
|
||||
##
|
||||
# Iterate over the matching nodes
|
||||
|
||||
def iterfind(elem, path, namespaces=None):
|
||||
selector = _build_path_iterator(path, namespaces)
|
||||
result = iter((elem,))
|
||||
for select in selector:
|
||||
result = select(result)
|
||||
return result
|
||||
|
||||
|
||||
##
|
||||
# Find first matching object.
|
||||
|
||||
def find(elem, path, namespaces=None):
|
||||
it = iterfind(elem, path, namespaces)
|
||||
try:
|
||||
try:
|
||||
_next = it.next
|
||||
except AttributeError:
|
||||
return next(it)
|
||||
else:
|
||||
return _next()
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
##
|
||||
# Find all matching objects.
|
||||
|
||||
def findall(elem, path, namespaces=None):
|
||||
return list(iterfind(elem, path, namespaces))
|
||||
|
||||
|
||||
##
|
||||
# Find text for first matching object.
|
||||
|
||||
def findtext(elem, path, default=None, namespaces=None):
|
||||
el = find(elem, path, namespaces)
|
||||
if el is None:
|
||||
return default
|
||||
else:
|
||||
return el.text or ''
|
||||
246
lib/python3.5/site-packages/lxml/builder.py
Normal file
246
lib/python3.5/site-packages/lxml/builder.py
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
#
|
||||
# Element generator factory by Fredrik Lundh.
|
||||
#
|
||||
# Source:
|
||||
# http://online.effbot.org/2006_11_01_archive.htm#et-builder
|
||||
# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
#
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
#
|
||||
# By obtaining, using, and/or copying this software and/or its
|
||||
# associated documentation, you agree that you have read, understood,
|
||||
# and will comply with the following terms and conditions:
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and
|
||||
# its associated documentation for any purpose and without fee is
|
||||
# hereby granted, provided that the above copyright notice appears in
|
||||
# all copies, and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Secret Labs AB or the author not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
||||
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
||||
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
||||
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
||||
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
# OF THIS SOFTWARE.
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
The ``E`` Element factory for generating XML documents.
|
||||
"""
|
||||
|
||||
import lxml.etree as ET
|
||||
|
||||
try:
|
||||
from functools import partial
|
||||
except ImportError:
|
||||
# fake it for pre-2.5 releases
|
||||
def partial(func, tag):
|
||||
return lambda *args, **kwargs: func(tag, *args, **kwargs)
|
||||
|
||||
try:
|
||||
callable
|
||||
except NameError:
|
||||
# Python 3
|
||||
def callable(f):
|
||||
return hasattr(f, '__call__')
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = str
|
||||
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
unicode = str
|
||||
|
||||
|
||||
class ElementMaker(object):
|
||||
"""Element generator factory.
|
||||
|
||||
Unlike the ordinary Element factory, the E factory allows you to pass in
|
||||
more than just a tag and some optional attributes; you can also pass in
|
||||
text and other elements. The text is added as either text or tail
|
||||
attributes, and elements are inserted at the right spot. Some small
|
||||
examples::
|
||||
|
||||
>>> from lxml import etree as ET
|
||||
>>> from lxml.builder import E
|
||||
|
||||
>>> ET.tostring(E("tag"))
|
||||
'<tag/>'
|
||||
>>> ET.tostring(E("tag", "text"))
|
||||
'<tag>text</tag>'
|
||||
>>> ET.tostring(E("tag", "text", key="value"))
|
||||
'<tag key="value">text</tag>'
|
||||
>>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
|
||||
'<tag><subtag>text</subtag>tail</tag>'
|
||||
|
||||
For simple tags, the factory also allows you to write ``E.tag(...)`` instead
|
||||
of ``E('tag', ...)``::
|
||||
|
||||
>>> ET.tostring(E.tag())
|
||||
'<tag/>'
|
||||
>>> ET.tostring(E.tag("text"))
|
||||
'<tag>text</tag>'
|
||||
>>> ET.tostring(E.tag(E.subtag("text"), "tail"))
|
||||
'<tag><subtag>text</subtag>tail</tag>'
|
||||
|
||||
Here's a somewhat larger example; this shows how to generate HTML
|
||||
documents, using a mix of prepared factory functions for inline elements,
|
||||
nested ``E.tag`` calls, and embedded XHTML fragments::
|
||||
|
||||
# some common inline elements
|
||||
A = E.a
|
||||
I = E.i
|
||||
B = E.b
|
||||
|
||||
def CLASS(v):
|
||||
# helper function, 'class' is a reserved word
|
||||
return {'class': v}
|
||||
|
||||
page = (
|
||||
E.html(
|
||||
E.head(
|
||||
E.title("This is a sample document")
|
||||
),
|
||||
E.body(
|
||||
E.h1("Hello!", CLASS("title")),
|
||||
E.p("This is a paragraph with ", B("bold"), " text in it!"),
|
||||
E.p("This is another paragraph, with a ",
|
||||
A("link", href="http://www.python.org"), "."),
|
||||
E.p("Here are some reservered characters: <spam&egg>."),
|
||||
ET.XML("<p>And finally, here is an embedded XHTML fragment.</p>"),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
print ET.tostring(page)
|
||||
|
||||
Here's a prettyprinted version of the output from the above script::
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>This is a sample document</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1 class="title">Hello!</h1>
|
||||
<p>This is a paragraph with <b>bold</b> text in it!</p>
|
||||
<p>This is another paragraph, with <a href="http://www.python.org">link</a>.</p>
|
||||
<p>Here are some reservered characters: <spam&egg>.</p>
|
||||
<p>And finally, here is an embedded XHTML fragment.</p>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
For namespace support, you can pass a namespace map (``nsmap``)
|
||||
and/or a specific target ``namespace`` to the ElementMaker class::
|
||||
|
||||
>>> E = ElementMaker(namespace="http://my.ns/")
|
||||
>>> print(ET.tostring( E.test ))
|
||||
<test xmlns="http://my.ns/"/>
|
||||
|
||||
>>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'})
|
||||
>>> print(ET.tostring( E.test ))
|
||||
<p:test xmlns:p="http://my.ns/"/>
|
||||
"""
|
||||
|
||||
def __init__(self, typemap=None,
|
||||
namespace=None, nsmap=None, makeelement=None):
|
||||
if namespace is not None:
|
||||
self._namespace = '{' + namespace + '}'
|
||||
else:
|
||||
self._namespace = None
|
||||
|
||||
if nsmap:
|
||||
self._nsmap = dict(nsmap)
|
||||
else:
|
||||
self._nsmap = None
|
||||
|
||||
if makeelement is not None:
|
||||
assert callable(makeelement)
|
||||
self._makeelement = makeelement
|
||||
else:
|
||||
self._makeelement = ET.Element
|
||||
|
||||
# initialize type map for this element factory
|
||||
|
||||
if typemap:
|
||||
typemap = typemap.copy()
|
||||
else:
|
||||
typemap = {}
|
||||
|
||||
def add_text(elem, item):
|
||||
try:
|
||||
elem[-1].tail = (elem[-1].tail or "") + item
|
||||
except IndexError:
|
||||
elem.text = (elem.text or "") + item
|
||||
|
||||
def add_cdata(elem, cdata):
|
||||
if elem.text:
|
||||
raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text)
|
||||
elem.text = cdata
|
||||
|
||||
if str not in typemap:
|
||||
typemap[str] = add_text
|
||||
if unicode not in typemap:
|
||||
typemap[unicode] = add_text
|
||||
if ET.CDATA not in typemap:
|
||||
typemap[ET.CDATA] = add_cdata
|
||||
|
||||
def add_dict(elem, item):
|
||||
attrib = elem.attrib
|
||||
for k, v in item.items():
|
||||
if isinstance(v, basestring):
|
||||
attrib[k] = v
|
||||
else:
|
||||
attrib[k] = typemap[type(v)](None, v)
|
||||
if dict not in typemap:
|
||||
typemap[dict] = add_dict
|
||||
|
||||
self._typemap = typemap
|
||||
|
||||
def __call__(self, tag, *children, **attrib):
|
||||
get = self._typemap.get
|
||||
|
||||
if self._namespace is not None and tag[0] != '{':
|
||||
tag = self._namespace + tag
|
||||
elem = self._makeelement(tag, nsmap=self._nsmap)
|
||||
if attrib:
|
||||
get(dict)(elem, attrib)
|
||||
|
||||
for item in children:
|
||||
if callable(item):
|
||||
item = item()
|
||||
t = get(type(item))
|
||||
if t is None:
|
||||
if ET.iselement(item):
|
||||
elem.append(item)
|
||||
continue
|
||||
for basetype in type(item).__mro__:
|
||||
# See if the typemap knows of any of this type's bases.
|
||||
t = get(basetype)
|
||||
if t is not None:
|
||||
break
|
||||
else:
|
||||
raise TypeError("bad argument type: %s(%r)" %
|
||||
(type(item).__name__, item))
|
||||
v = t(elem, item)
|
||||
if v:
|
||||
get(type(v))(elem, v)
|
||||
|
||||
return elem
|
||||
|
||||
def __getattr__(self, tag):
|
||||
return partial(self, tag)
|
||||
|
||||
# create factory object
|
||||
E = ElementMaker()
|
||||
102
lib/python3.5/site-packages/lxml/cssselect.py
Normal file
102
lib/python3.5/site-packages/lxml/cssselect.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
"""CSS Selectors based on XPath.
|
||||
|
||||
This module supports selecting XML/HTML tags based on CSS selectors.
|
||||
See the `CSSSelector` class for details.
|
||||
|
||||
This is a thin wrapper around cssselect 0.7 or later.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from . import etree
|
||||
try:
|
||||
import cssselect as external_cssselect
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
'cssselect does not seem to be installed. '
|
||||
'See http://packages.python.org/cssselect/')
|
||||
|
||||
|
||||
SelectorSyntaxError = external_cssselect.SelectorSyntaxError
|
||||
ExpressionError = external_cssselect.ExpressionError
|
||||
SelectorError = external_cssselect.SelectorError
|
||||
|
||||
|
||||
__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
|
||||
'CSSSelector']
|
||||
|
||||
|
||||
class LxmlTranslator(external_cssselect.GenericTranslator):
|
||||
"""
|
||||
A custom CSS selector to XPath translator with lxml-specific extensions.
|
||||
"""
|
||||
def xpath_contains_function(self, xpath, function):
|
||||
# Defined there, removed in later drafts:
|
||||
# http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
|
||||
if function.argument_types() not in (['STRING'], ['IDENT']):
|
||||
raise ExpressionError(
|
||||
"Expected a single string or ident for :contains(), got %r"
|
||||
% function.arguments)
|
||||
value = function.arguments[0].value
|
||||
return xpath.add_condition(
|
||||
'contains(__lxml_internal_css:lower-case(string(.)), %s)'
|
||||
% self.xpath_literal(value.lower()))
|
||||
|
||||
|
||||
class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
|
||||
"""
|
||||
lxml extensions + HTML support.
|
||||
"""
|
||||
|
||||
|
||||
def _make_lower_case(context, s):
|
||||
return s.lower()
|
||||
|
||||
ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
|
||||
ns.prefix = '__lxml_internal_css'
|
||||
ns['lower-case'] = _make_lower_case
|
||||
|
||||
|
||||
class CSSSelector(etree.XPath):
|
||||
"""A CSS selector.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from lxml import etree, cssselect
|
||||
>>> select = cssselect.CSSSelector("a tag > child")
|
||||
|
||||
>>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
|
||||
>>> [ el.tag for el in select(root) ]
|
||||
['child']
|
||||
|
||||
To use CSS namespaces, you need to pass a prefix-to-namespace
|
||||
mapping as ``namespaces`` keyword argument::
|
||||
|
||||
>>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
||||
>>> select_ns = cssselect.CSSSelector('root > rdf|Description',
|
||||
... namespaces={'rdf': rdfns})
|
||||
|
||||
>>> rdf = etree.XML((
|
||||
... '<root xmlns:rdf="%s">'
|
||||
... '<rdf:Description>blah</rdf:Description>'
|
||||
... '</root>') % rdfns)
|
||||
>>> [(el.tag, el.text) for el in select_ns(rdf)]
|
||||
[('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
|
||||
|
||||
"""
|
||||
def __init__(self, css, namespaces=None, translator='xml'):
|
||||
if translator == 'xml':
|
||||
translator = LxmlTranslator()
|
||||
elif translator == 'html':
|
||||
translator = LxmlHTMLTranslator()
|
||||
elif translator == 'xhtml':
|
||||
translator = LxmlHTMLTranslator(xhtml=True)
|
||||
path = translator.css_to_xpath(css)
|
||||
etree.XPath.__init__(self, path, namespaces=namespaces)
|
||||
self.css = css
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s %s for %r>' % (
|
||||
self.__class__.__name__,
|
||||
hex(abs(id(self)))[2:],
|
||||
self.css)
|
||||
508
lib/python3.5/site-packages/lxml/doctestcompare.py
Normal file
508
lib/python3.5/site-packages/lxml/doctestcompare.py
Normal file
|
|
@ -0,0 +1,508 @@
|
|||
"""
|
||||
lxml-based doctest output comparison.
|
||||
|
||||
Note: normally, you should just import the `lxml.usedoctest` and
|
||||
`lxml.html.usedoctest` modules from within a doctest, instead of this
|
||||
one::
|
||||
|
||||
>>> import lxml.usedoctest # for XML output
|
||||
|
||||
>>> import lxml.html.usedoctest # for HTML output
|
||||
|
||||
To use this module directly, you must call ``lxmldoctest.install()``,
|
||||
which will cause doctest to use this in all subsequent calls.
|
||||
|
||||
This changes the way output is checked and comparisons are made for
|
||||
XML or HTML-like content.
|
||||
|
||||
XML or HTML content is noticed because the example starts with ``<``
|
||||
(it's HTML if it starts with ``<html``). You can also use the
|
||||
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
|
||||
|
||||
Some rough wildcard-like things are allowed. Whitespace is generally
|
||||
ignored (except in attributes). In text (attributes and text in the
|
||||
body) you can use ``...`` as a wildcard. In an example it also
|
||||
matches any trailing tags in the element, though it does not match
|
||||
leading tags. You may create a tag ``<any>`` or include an ``any``
|
||||
attribute in the tag. An ``any`` tag matches any tag, while the
|
||||
attribute matches any and all attributes.
|
||||
|
||||
When a match fails, the reformatted example and gotten text is
|
||||
displayed (indented), and a rough diff-like output is given. Anything
|
||||
marked with ``+`` is in the output but wasn't supposed to be, and
|
||||
similarly ``-`` means its in the example but wasn't in the output.
|
||||
|
||||
You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
|
||||
"""
|
||||
|
||||
from lxml import etree
|
||||
import sys
|
||||
import re
|
||||
import doctest
|
||||
try:
|
||||
from html import escape as html_escape
|
||||
except ImportError:
|
||||
from cgi import escape as html_escape
|
||||
|
||||
__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
|
||||
'LHTMLOutputChecker', 'install', 'temp_install']
|
||||
|
||||
try:
|
||||
_basestring = basestring
|
||||
except NameError:
|
||||
_basestring = (str, bytes)
|
||||
|
||||
_IS_PYTHON_3 = sys.version_info[0] >= 3
|
||||
|
||||
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
|
||||
PARSE_XML = doctest.register_optionflag('PARSE_XML')
|
||||
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
|
||||
|
||||
OutputChecker = doctest.OutputChecker
|
||||
|
||||
def strip(v):
|
||||
if v is None:
|
||||
return None
|
||||
else:
|
||||
return v.strip()
|
||||
|
||||
def norm_whitespace(v):
|
||||
return _norm_whitespace_re.sub(' ', v)
|
||||
|
||||
_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
|
||||
|
||||
def html_fromstring(html):
|
||||
return etree.fromstring(html, _html_parser)
|
||||
|
||||
# We use this to distinguish repr()s from elements:
|
||||
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
|
||||
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
|
||||
|
||||
class LXMLOutputChecker(OutputChecker):
|
||||
|
||||
empty_tags = (
|
||||
'param', 'img', 'area', 'br', 'basefont', 'input',
|
||||
'base', 'meta', 'link', 'col')
|
||||
|
||||
def get_default_parser(self):
|
||||
return etree.XML
|
||||
|
||||
def check_output(self, want, got, optionflags):
|
||||
alt_self = getattr(self, '_temp_override_self', None)
|
||||
if alt_self is not None:
|
||||
super_method = self._temp_call_super_check_output
|
||||
self = alt_self
|
||||
else:
|
||||
super_method = OutputChecker.check_output
|
||||
parser = self.get_parser(want, got, optionflags)
|
||||
if not parser:
|
||||
return super_method(
|
||||
self, want, got, optionflags)
|
||||
try:
|
||||
want_doc = parser(want)
|
||||
except etree.XMLSyntaxError:
|
||||
return False
|
||||
try:
|
||||
got_doc = parser(got)
|
||||
except etree.XMLSyntaxError:
|
||||
return False
|
||||
return self.compare_docs(want_doc, got_doc)
|
||||
|
||||
def get_parser(self, want, got, optionflags):
|
||||
parser = None
|
||||
if NOPARSE_MARKUP & optionflags:
|
||||
return None
|
||||
if PARSE_HTML & optionflags:
|
||||
parser = html_fromstring
|
||||
elif PARSE_XML & optionflags:
|
||||
parser = etree.XML
|
||||
elif (want.strip().lower().startswith('<html')
|
||||
and got.strip().startswith('<html')):
|
||||
parser = html_fromstring
|
||||
elif (self._looks_like_markup(want)
|
||||
and self._looks_like_markup(got)):
|
||||
parser = self.get_default_parser()
|
||||
return parser
|
||||
|
||||
def _looks_like_markup(self, s):
|
||||
s = s.strip()
|
||||
return (s.startswith('<')
|
||||
and not _repr_re.search(s))
|
||||
|
||||
def compare_docs(self, want, got):
|
||||
if not self.tag_compare(want.tag, got.tag):
|
||||
return False
|
||||
if not self.text_compare(want.text, got.text, True):
|
||||
return False
|
||||
if not self.text_compare(want.tail, got.tail, True):
|
||||
return False
|
||||
if 'any' not in want.attrib:
|
||||
want_keys = sorted(want.attrib.keys())
|
||||
got_keys = sorted(got.attrib.keys())
|
||||
if want_keys != got_keys:
|
||||
return False
|
||||
for key in want_keys:
|
||||
if not self.text_compare(want.attrib[key], got.attrib[key], False):
|
||||
return False
|
||||
if want.text != '...' or len(want):
|
||||
want_children = list(want)
|
||||
got_children = list(got)
|
||||
while want_children or got_children:
|
||||
if not want_children or not got_children:
|
||||
return False
|
||||
want_first = want_children.pop(0)
|
||||
got_first = got_children.pop(0)
|
||||
if not self.compare_docs(want_first, got_first):
|
||||
return False
|
||||
if not got_children and want_first.tail == '...':
|
||||
break
|
||||
return True
|
||||
|
||||
def text_compare(self, want, got, strip):
|
||||
want = want or ''
|
||||
got = got or ''
|
||||
if strip:
|
||||
want = norm_whitespace(want).strip()
|
||||
got = norm_whitespace(got).strip()
|
||||
want = '^%s$' % re.escape(want)
|
||||
want = want.replace(r'\.\.\.', '.*')
|
||||
if re.search(want, got):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def tag_compare(self, want, got):
|
||||
if want == 'any':
|
||||
return True
|
||||
if (not isinstance(want, _basestring)
|
||||
or not isinstance(got, _basestring)):
|
||||
return want == got
|
||||
want = want or ''
|
||||
got = got or ''
|
||||
if want.startswith('{...}'):
|
||||
# Ellipsis on the namespace
|
||||
return want.split('}')[-1] == got.split('}')[-1]
|
||||
else:
|
||||
return want == got
|
||||
|
||||
def output_difference(self, example, got, optionflags):
|
||||
want = example.want
|
||||
parser = self.get_parser(want, got, optionflags)
|
||||
errors = []
|
||||
if parser is not None:
|
||||
try:
|
||||
want_doc = parser(want)
|
||||
except etree.XMLSyntaxError:
|
||||
e = sys.exc_info()[1]
|
||||
errors.append('In example: %s' % e)
|
||||
try:
|
||||
got_doc = parser(got)
|
||||
except etree.XMLSyntaxError:
|
||||
e = sys.exc_info()[1]
|
||||
errors.append('In actual output: %s' % e)
|
||||
if parser is None or errors:
|
||||
value = OutputChecker.output_difference(
|
||||
self, example, got, optionflags)
|
||||
if errors:
|
||||
errors.append(value)
|
||||
return '\n'.join(errors)
|
||||
else:
|
||||
return value
|
||||
html = parser is html_fromstring
|
||||
diff_parts = []
|
||||
diff_parts.append('Expected:')
|
||||
diff_parts.append(self.format_doc(want_doc, html, 2))
|
||||
diff_parts.append('Got:')
|
||||
diff_parts.append(self.format_doc(got_doc, html, 2))
|
||||
diff_parts.append('Diff:')
|
||||
diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
|
||||
return '\n'.join(diff_parts)
|
||||
|
||||
def html_empty_tag(self, el, html=True):
|
||||
if not html:
|
||||
return False
|
||||
if el.tag not in self.empty_tags:
|
||||
return False
|
||||
if el.text or len(el):
|
||||
# This shouldn't happen (contents in an empty tag)
|
||||
return False
|
||||
return True
|
||||
|
||||
def format_doc(self, doc, html, indent, prefix=''):
|
||||
parts = []
|
||||
if not len(doc):
|
||||
# No children...
|
||||
parts.append(' '*indent)
|
||||
parts.append(prefix)
|
||||
parts.append(self.format_tag(doc))
|
||||
if not self.html_empty_tag(doc, html):
|
||||
if strip(doc.text):
|
||||
parts.append(self.format_text(doc.text))
|
||||
parts.append(self.format_end_tag(doc))
|
||||
if strip(doc.tail):
|
||||
parts.append(self.format_text(doc.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
parts.append(' '*indent)
|
||||
parts.append(prefix)
|
||||
parts.append(self.format_tag(doc))
|
||||
if not self.html_empty_tag(doc, html):
|
||||
parts.append('\n')
|
||||
if strip(doc.text):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_text(doc.text))
|
||||
parts.append('\n')
|
||||
for el in doc:
|
||||
parts.append(self.format_doc(el, html, indent+2))
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_end_tag(doc))
|
||||
parts.append('\n')
|
||||
if strip(doc.tail):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.format_text(doc.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
|
||||
def format_text(self, text, strip=True):
|
||||
if text is None:
|
||||
return ''
|
||||
if strip:
|
||||
text = text.strip()
|
||||
return html_escape(text, 1)
|
||||
|
||||
def format_tag(self, el):
|
||||
attrs = []
|
||||
if isinstance(el, etree.CommentBase):
|
||||
# FIXME: probably PIs should be handled specially too?
|
||||
return '<!--'
|
||||
for name, value in sorted(el.attrib.items()):
|
||||
attrs.append('%s="%s"' % (name, self.format_text(value, False)))
|
||||
if not attrs:
|
||||
return '<%s>' % el.tag
|
||||
return '<%s %s>' % (el.tag, ' '.join(attrs))
|
||||
|
||||
def format_end_tag(self, el):
|
||||
if isinstance(el, etree.CommentBase):
|
||||
# FIXME: probably PIs should be handled specially too?
|
||||
return '-->'
|
||||
return '</%s>' % el.tag
|
||||
|
||||
def collect_diff(self, want, got, html, indent):
|
||||
parts = []
|
||||
if not len(want) and not len(got):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_tag(want, got))
|
||||
if not self.html_empty_tag(got, html):
|
||||
parts.append(self.collect_diff_text(want.text, got.text))
|
||||
parts.append(self.collect_diff_end_tag(want, got))
|
||||
parts.append(self.collect_diff_text(want.tail, got.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_tag(want, got))
|
||||
parts.append('\n')
|
||||
if strip(want.text) or strip(got.text):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_text(want.text, got.text))
|
||||
parts.append('\n')
|
||||
want_children = list(want)
|
||||
got_children = list(got)
|
||||
while want_children or got_children:
|
||||
if not want_children:
|
||||
parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
|
||||
continue
|
||||
if not got_children:
|
||||
parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
|
||||
continue
|
||||
parts.append(self.collect_diff(
|
||||
want_children.pop(0), got_children.pop(0), html, indent+2))
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_end_tag(want, got))
|
||||
parts.append('\n')
|
||||
if strip(want.tail) or strip(got.tail):
|
||||
parts.append(' '*indent)
|
||||
parts.append(self.collect_diff_text(want.tail, got.tail))
|
||||
parts.append('\n')
|
||||
return ''.join(parts)
|
||||
|
||||
def collect_diff_tag(self, want, got):
|
||||
if not self.tag_compare(want.tag, got.tag):
|
||||
tag = '%s (got: %s)' % (want.tag, got.tag)
|
||||
else:
|
||||
tag = got.tag
|
||||
attrs = []
|
||||
any = want.tag == 'any' or 'any' in want.attrib
|
||||
for name, value in sorted(got.attrib.items()):
|
||||
if name not in want.attrib and not any:
|
||||
attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
|
||||
else:
|
||||
if name in want.attrib:
|
||||
text = self.collect_diff_text(want.attrib[name], value, False)
|
||||
else:
|
||||
text = self.format_text(value, False)
|
||||
attrs.append('%s="%s"' % (name, text))
|
||||
if not any:
|
||||
for name, value in sorted(want.attrib.items()):
|
||||
if name in got.attrib:
|
||||
continue
|
||||
attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
|
||||
if attrs:
|
||||
tag = '<%s %s>' % (tag, ' '.join(attrs))
|
||||
else:
|
||||
tag = '<%s>' % tag
|
||||
return tag
|
||||
|
||||
def collect_diff_end_tag(self, want, got):
|
||||
if want.tag != got.tag:
|
||||
tag = '%s (got: %s)' % (want.tag, got.tag)
|
||||
else:
|
||||
tag = got.tag
|
||||
return '</%s>' % tag
|
||||
|
||||
def collect_diff_text(self, want, got, strip=True):
|
||||
if self.text_compare(want, got, strip):
|
||||
if not got:
|
||||
return ''
|
||||
return self.format_text(got, strip)
|
||||
text = '%s (got: %s)' % (want, got)
|
||||
return self.format_text(text, strip)
|
||||
|
||||
class LHTMLOutputChecker(LXMLOutputChecker):
|
||||
def get_default_parser(self):
|
||||
return html_fromstring
|
||||
|
||||
def install(html=False):
|
||||
"""
|
||||
Install doctestcompare for all future doctests.
|
||||
|
||||
If html is true, then by default the HTML parser will be used;
|
||||
otherwise the XML parser is used.
|
||||
"""
|
||||
if html:
|
||||
doctest.OutputChecker = LHTMLOutputChecker
|
||||
else:
|
||||
doctest.OutputChecker = LXMLOutputChecker
|
||||
|
||||
def temp_install(html=False, del_module=None):
|
||||
"""
|
||||
Use this *inside* a doctest to enable this checker for this
|
||||
doctest only.
|
||||
|
||||
If html is true, then by default the HTML parser will be used;
|
||||
otherwise the XML parser is used.
|
||||
"""
|
||||
if html:
|
||||
Checker = LHTMLOutputChecker
|
||||
else:
|
||||
Checker = LXMLOutputChecker
|
||||
frame = _find_doctest_frame()
|
||||
dt_self = frame.f_locals['self']
|
||||
checker = Checker()
|
||||
old_checker = dt_self._checker
|
||||
dt_self._checker = checker
|
||||
# The unfortunate thing is that there is a local variable 'check'
|
||||
# in the function that runs the doctests, that is a bound method
|
||||
# into the output checker. We have to update that. We can't
|
||||
# modify the frame, so we have to modify the object in place. The
|
||||
# only way to do this is to actually change the func_code
|
||||
# attribute of the method. We change it, and then wait for
|
||||
# __record_outcome to be run, which signals the end of the __run
|
||||
# method, at which point we restore the previous check_output
|
||||
# implementation.
|
||||
if _IS_PYTHON_3:
|
||||
check_func = frame.f_locals['check'].__func__
|
||||
checker_check_func = checker.check_output.__func__
|
||||
else:
|
||||
check_func = frame.f_locals['check'].im_func
|
||||
checker_check_func = checker.check_output.im_func
|
||||
# Because we can't patch up func_globals, this is the only global
|
||||
# in check_output that we care about:
|
||||
doctest.etree = etree
|
||||
_RestoreChecker(dt_self, old_checker, checker,
|
||||
check_func, checker_check_func,
|
||||
del_module)
|
||||
|
||||
class _RestoreChecker(object):
|
||||
def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
|
||||
del_module):
|
||||
self.dt_self = dt_self
|
||||
self.checker = old_checker
|
||||
self.checker._temp_call_super_check_output = self.call_super
|
||||
self.checker._temp_override_self = new_checker
|
||||
self.check_func = check_func
|
||||
self.clone_func = clone_func
|
||||
self.del_module = del_module
|
||||
self.install_clone()
|
||||
self.install_dt_self()
|
||||
def install_clone(self):
|
||||
if _IS_PYTHON_3:
|
||||
self.func_code = self.check_func.__code__
|
||||
self.func_globals = self.check_func.__globals__
|
||||
self.check_func.__code__ = self.clone_func.__code__
|
||||
else:
|
||||
self.func_code = self.check_func.func_code
|
||||
self.func_globals = self.check_func.func_globals
|
||||
self.check_func.func_code = self.clone_func.func_code
|
||||
def uninstall_clone(self):
|
||||
if _IS_PYTHON_3:
|
||||
self.check_func.__code__ = self.func_code
|
||||
else:
|
||||
self.check_func.func_code = self.func_code
|
||||
def install_dt_self(self):
|
||||
self.prev_func = self.dt_self._DocTestRunner__record_outcome
|
||||
self.dt_self._DocTestRunner__record_outcome = self
|
||||
def uninstall_dt_self(self):
|
||||
self.dt_self._DocTestRunner__record_outcome = self.prev_func
|
||||
def uninstall_module(self):
|
||||
if self.del_module:
|
||||
import sys
|
||||
del sys.modules[self.del_module]
|
||||
if '.' in self.del_module:
|
||||
package, module = self.del_module.rsplit('.', 1)
|
||||
package_mod = sys.modules[package]
|
||||
delattr(package_mod, module)
|
||||
def __call__(self, *args, **kw):
|
||||
self.uninstall_clone()
|
||||
self.uninstall_dt_self()
|
||||
del self.checker._temp_override_self
|
||||
del self.checker._temp_call_super_check_output
|
||||
result = self.prev_func(*args, **kw)
|
||||
self.uninstall_module()
|
||||
return result
|
||||
def call_super(self, *args, **kw):
|
||||
self.uninstall_clone()
|
||||
try:
|
||||
return self.check_func(*args, **kw)
|
||||
finally:
|
||||
self.install_clone()
|
||||
|
||||
def _find_doctest_frame():
|
||||
import sys
|
||||
frame = sys._getframe(1)
|
||||
while frame:
|
||||
l = frame.f_locals
|
||||
if 'BOOM' in l:
|
||||
# Sign of doctest
|
||||
return frame
|
||||
frame = frame.f_back
|
||||
raise LookupError(
|
||||
"Could not find doctest (only use this function *inside* a doctest)")
|
||||
|
||||
__test__ = {
|
||||
'basic': '''
|
||||
>>> temp_install()
|
||||
>>> print """<xml a="1" b="2">stuff</xml>"""
|
||||
<xml b="2" a="1">...</xml>
|
||||
>>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
|
||||
<xml xmlns="...">
|
||||
<tag attr="..." />
|
||||
</xml>
|
||||
>>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
|
||||
<xml>...foo /></xml>
|
||||
'''}
|
||||
|
||||
if __name__ == '__main__':
|
||||
import doctest
|
||||
doctest.testmod()
|
||||
|
||||
|
||||
BIN
lib/python3.5/site-packages/lxml/etree.cpython-35m-darwin.so
Executable file
BIN
lib/python3.5/site-packages/lxml/etree.cpython-35m-darwin.so
Executable file
Binary file not shown.
10
lib/python3.5/site-packages/lxml/html/ElementSoup.py
Normal file
10
lib/python3.5/site-packages/lxml/html/ElementSoup.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
__doc__ = """Legacy interface to the BeautifulSoup HTML parser.
|
||||
"""
|
||||
|
||||
__all__ = ["parse", "convert_tree"]
|
||||
|
||||
from soupparser import convert_tree, parse as _parse
|
||||
|
||||
def parse(file, beautifulsoup=None, makeelement=None):
|
||||
root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
|
||||
return root.getroot()
|
||||
1923
lib/python3.5/site-packages/lxml/html/__init__.py
Normal file
1923
lib/python3.5/site-packages/lxml/html/__init__.py
Normal file
File diff suppressed because it is too large
Load diff
87
lib/python3.5/site-packages/lxml/html/_diffcommand.py
Normal file
87
lib/python3.5/site-packages/lxml/html/_diffcommand.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import optparse
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
from lxml.html.diff import htmldiff
|
||||
|
||||
description = """\
|
||||
"""
|
||||
|
||||
parser = optparse.OptionParser(
|
||||
usage="%prog [OPTIONS] FILE1 FILE2\n"
|
||||
"%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...",
|
||||
description=description,
|
||||
)
|
||||
|
||||
parser.add_option(
|
||||
'-o', '--output',
|
||||
metavar="FILE",
|
||||
dest="output",
|
||||
default="-",
|
||||
help="File to write the difference to",
|
||||
)
|
||||
|
||||
parser.add_option(
|
||||
'-a', '--annotation',
|
||||
action="store_true",
|
||||
dest="annotation",
|
||||
help="Do an annotation")
|
||||
|
||||
def main(args=None):
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
options, args = parser.parse_args(args)
|
||||
if options.annotation:
|
||||
return annotate(options, args)
|
||||
if len(args) != 2:
|
||||
print('Error: you must give two files')
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
file1, file2 = args
|
||||
input1 = read_file(file1)
|
||||
input2 = read_file(file2)
|
||||
body1 = split_body(input1)[1]
|
||||
pre, body2, post = split_body(input2)
|
||||
result = htmldiff(body1, body2)
|
||||
result = pre + result + post
|
||||
if options.output == '-':
|
||||
if not result.endswith('\n'):
|
||||
result += '\n'
|
||||
sys.stdout.write(result)
|
||||
else:
|
||||
f = open(options.output, 'wb')
|
||||
f.write(result)
|
||||
f.close()
|
||||
|
||||
def read_file(filename):
|
||||
if filename == '-':
|
||||
c = sys.stdin.read()
|
||||
elif not os.path.exists(filename):
|
||||
raise OSError(
|
||||
"Input file %s does not exist" % filename)
|
||||
else:
|
||||
f = open(filename, 'rb')
|
||||
c = f.read()
|
||||
f.close()
|
||||
return c
|
||||
|
||||
body_start_re = re.compile(
|
||||
r"<body.*?>", re.I|re.S)
|
||||
body_end_re = re.compile(
|
||||
r"</body.*?>", re.I|re.S)
|
||||
|
||||
def split_body(html):
|
||||
match = body_start_re.search(html)
|
||||
if match:
|
||||
pre = html[:match.end()]
|
||||
html = html[match.end():]
|
||||
match = body_end_re.search(html)
|
||||
if match:
|
||||
post = html[match.start():]
|
||||
html = html[:match.start()]
|
||||
return pre, html, post
|
||||
|
||||
def annotate(options, args):
|
||||
print("Not yet implemented")
|
||||
sys.exit(1)
|
||||
|
||||
100
lib/python3.5/site-packages/lxml/html/_html5builder.py
Normal file
100
lib/python3.5/site-packages/lxml/html/_html5builder.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""
|
||||
Legacy module - don't use in new code!
|
||||
|
||||
html5lib now has its own proper implementation.
|
||||
|
||||
This module implements a tree builder for html5lib that generates lxml
|
||||
html element trees. This module uses camelCase as it follows the
|
||||
html5lib style guide.
|
||||
"""
|
||||
|
||||
from html5lib.treebuilders import _base, etree as etree_builders
|
||||
from lxml import html, etree
|
||||
|
||||
|
||||
class DocumentType(object):
|
||||
|
||||
def __init__(self, name, publicId, systemId):
|
||||
self.name = name
|
||||
self.publicId = publicId
|
||||
self.systemId = systemId
|
||||
|
||||
class Document(object):
|
||||
|
||||
def __init__(self):
|
||||
self._elementTree = None
|
||||
self.childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
self._elementTree.getroot().addnext(element._element)
|
||||
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = None
|
||||
commentClass = None
|
||||
fragmentClass = Document
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
html_builder = etree_builders.getETreeModule(html, fullTree=False)
|
||||
etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
|
||||
self.elementClass = html_builder.Element
|
||||
self.commentClass = etree_builder.Comment
|
||||
_base.TreeBuilder.__init__(self, *args, **kwargs)
|
||||
|
||||
def reset(self):
|
||||
_base.TreeBuilder.reset(self)
|
||||
self.rootInserted = False
|
||||
self.initialComments = []
|
||||
self.doctype = None
|
||||
|
||||
def getDocument(self):
|
||||
return self.document._elementTree
|
||||
|
||||
def getFragment(self):
|
||||
fragment = []
|
||||
element = self.openElements[0]._element
|
||||
if element.text:
|
||||
fragment.append(element.text)
|
||||
fragment.extend(element.getchildren())
|
||||
if element.tail:
|
||||
fragment.append(element.tail)
|
||||
return fragment
|
||||
|
||||
def insertDoctype(self, name, publicId, systemId):
|
||||
doctype = self.doctypeClass(name, publicId, systemId)
|
||||
self.doctype = doctype
|
||||
|
||||
def insertComment(self, data, parent=None):
|
||||
if not self.rootInserted:
|
||||
self.initialComments.append(data)
|
||||
else:
|
||||
_base.TreeBuilder.insertComment(self, data, parent)
|
||||
|
||||
def insertRoot(self, name):
|
||||
buf = []
|
||||
if self.doctype and self.doctype.name:
|
||||
buf.append('<!DOCTYPE %s' % self.doctype.name)
|
||||
if self.doctype.publicId is not None or self.doctype.systemId is not None:
|
||||
buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
|
||||
self.doctype.systemId))
|
||||
buf.append('>')
|
||||
buf.append('<html></html>')
|
||||
root = html.fromstring(''.join(buf))
|
||||
|
||||
# Append the initial comments:
|
||||
for comment in self.initialComments:
|
||||
root.addprevious(etree.Comment(comment))
|
||||
|
||||
# Create the root document and add the ElementTree to it
|
||||
self.document = self.documentClass()
|
||||
self.document._elementTree = root.getroottree()
|
||||
|
||||
# Add the root element to the internal child/open data structures
|
||||
root_element = self.elementClass(name)
|
||||
root_element._element = root
|
||||
self.document.childNodes.append(root_element)
|
||||
self.openElements.append(root_element)
|
||||
|
||||
self.rootInserted = True
|
||||
52
lib/python3.5/site-packages/lxml/html/_setmixin.py
Normal file
52
lib/python3.5/site-packages/lxml/html/_setmixin.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
from collections import MutableSet
|
||||
|
||||
class SetMixin(MutableSet):
|
||||
|
||||
"""
|
||||
Mix-in for sets. You must define __iter__, add, remove
|
||||
"""
|
||||
|
||||
def __len__(self):
|
||||
length = 0
|
||||
for item in self:
|
||||
length += 1
|
||||
return length
|
||||
|
||||
def __contains__(self, item):
|
||||
for has_item in self:
|
||||
if item == has_item:
|
||||
return True
|
||||
return False
|
||||
|
||||
issubset = MutableSet.__le__
|
||||
issuperset = MutableSet.__ge__
|
||||
|
||||
union = MutableSet.__or__
|
||||
intersection = MutableSet.__and__
|
||||
difference = MutableSet.__sub__
|
||||
symmetric_difference = MutableSet.__xor__
|
||||
|
||||
def copy(self):
|
||||
return set(self)
|
||||
|
||||
def update(self, other):
|
||||
self |= other
|
||||
|
||||
def intersection_update(self, other):
|
||||
self &= other
|
||||
|
||||
def difference_update(self, other):
|
||||
self -= other
|
||||
|
||||
def symmetric_difference_update(self, other):
|
||||
self ^= other
|
||||
|
||||
def discard(self, item):
|
||||
try:
|
||||
self.remove(item)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def _from_iterable(cls, it):
|
||||
return set(it)
|
||||
133
lib/python3.5/site-packages/lxml/html/builder.py
Normal file
133
lib/python3.5/site-packages/lxml/html/builder.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# --------------------------------------------------------------------
|
||||
# The ElementTree toolkit is
|
||||
# Copyright (c) 1999-2004 by Fredrik Lundh
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
"""
|
||||
A set of HTML generator tags for building HTML documents.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> from lxml.html.builder import *
|
||||
>>> html = HTML(
|
||||
... HEAD( TITLE("Hello World") ),
|
||||
... BODY( CLASS("main"),
|
||||
... H1("Hello World !")
|
||||
... )
|
||||
... )
|
||||
|
||||
>>> import lxml.etree
|
||||
>>> print lxml.etree.tostring(html, pretty_print=True)
|
||||
<html>
|
||||
<head>
|
||||
<title>Hello World</title>
|
||||
</head>
|
||||
<body class="main">
|
||||
<h1>Hello World !</h1>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
from lxml.builder import ElementMaker
|
||||
from lxml.html import html_parser
|
||||
|
||||
E = ElementMaker(makeelement=html_parser.makeelement)
|
||||
|
||||
# elements
|
||||
A = E.a # anchor
|
||||
ABBR = E.abbr # abbreviated form (e.g., WWW, HTTP, etc.)
|
||||
ACRONYM = E.acronym #
|
||||
ADDRESS = E.address # information on author
|
||||
APPLET = E.applet # Java applet (DEPRECATED)
|
||||
AREA = E.area # client-side image map area
|
||||
B = E.b # bold text style
|
||||
BASE = E.base # document base URI
|
||||
BASEFONT = E.basefont # base font size (DEPRECATED)
|
||||
BDO = E.bdo # I18N BiDi over-ride
|
||||
BIG = E.big # large text style
|
||||
BLOCKQUOTE = E.blockquote # long quotation
|
||||
BODY = E.body # document body
|
||||
BR = E.br # forced line break
|
||||
BUTTON = E.button # push button
|
||||
CAPTION = E.caption # table caption
|
||||
CENTER = E.center # shorthand for DIV align=center (DEPRECATED)
|
||||
CITE = E.cite # citation
|
||||
CODE = E.code # computer code fragment
|
||||
COL = E.col # table column
|
||||
COLGROUP = E.colgroup # table column group
|
||||
DD = E.dd # definition description
|
||||
DEL = getattr(E, 'del') # deleted text
|
||||
DFN = E.dfn # instance definition
|
||||
DIR = E.dir # directory list (DEPRECATED)
|
||||
DIV = E.div # generic language/style container
|
||||
DL = E.dl # definition list
|
||||
DT = E.dt # definition term
|
||||
EM = E.em # emphasis
|
||||
FIELDSET = E.fieldset # form control group
|
||||
FONT = E.font # local change to font (DEPRECATED)
|
||||
FORM = E.form # interactive form
|
||||
FRAME = E.frame # subwindow
|
||||
FRAMESET = E.frameset # window subdivision
|
||||
H1 = E.h1 # heading
|
||||
H2 = E.h2 # heading
|
||||
H3 = E.h3 # heading
|
||||
H4 = E.h4 # heading
|
||||
H5 = E.h5 # heading
|
||||
H6 = E.h6 # heading
|
||||
HEAD = E.head # document head
|
||||
HR = E.hr # horizontal rule
|
||||
HTML = E.html # document root element
|
||||
I = E.i # italic text style
|
||||
IFRAME = E.iframe # inline subwindow
|
||||
IMG = E.img # Embedded image
|
||||
INPUT = E.input # form control
|
||||
INS = E.ins # inserted text
|
||||
ISINDEX = E.isindex # single line prompt (DEPRECATED)
|
||||
KBD = E.kbd # text to be entered by the user
|
||||
LABEL = E.label # form field label text
|
||||
LEGEND = E.legend # fieldset legend
|
||||
LI = E.li # list item
|
||||
LINK = E.link # a media-independent link
|
||||
MAP = E.map # client-side image map
|
||||
MENU = E.menu # menu list (DEPRECATED)
|
||||
META = E.meta # generic metainformation
|
||||
NOFRAMES = E.noframes # alternate content container for non frame-based rendering
|
||||
NOSCRIPT = E.noscript # alternate content container for non script-based rendering
|
||||
OBJECT = E.object # generic embedded object
|
||||
OL = E.ol # ordered list
|
||||
OPTGROUP = E.optgroup # option group
|
||||
OPTION = E.option # selectable choice
|
||||
P = E.p # paragraph
|
||||
PARAM = E.param # named property value
|
||||
PRE = E.pre # preformatted text
|
||||
Q = E.q # short inline quotation
|
||||
S = E.s # strike-through text style (DEPRECATED)
|
||||
SAMP = E.samp # sample program output, scripts, etc.
|
||||
SCRIPT = E.script # script statements
|
||||
SELECT = E.select # option selector
|
||||
SMALL = E.small # small text style
|
||||
SPAN = E.span # generic language/style container
|
||||
STRIKE = E.strike # strike-through text (DEPRECATED)
|
||||
STRONG = E.strong # strong emphasis
|
||||
STYLE = E.style # style info
|
||||
SUB = E.sub # subscript
|
||||
SUP = E.sup # superscript
|
||||
TABLE = E.table #
|
||||
TBODY = E.tbody # table body
|
||||
TD = E.td # table data cell
|
||||
TEXTAREA = E.textarea # multi-line text field
|
||||
TFOOT = E.tfoot # table footer
|
||||
TH = E.th # table header cell
|
||||
THEAD = E.thead # table header
|
||||
TITLE = E.title # document title
|
||||
TR = E.tr # table row
|
||||
TT = E.tt # teletype or monospaced text style
|
||||
U = E.u # underlined text style (DEPRECATED)
|
||||
UL = E.ul # unordered list
|
||||
VAR = E.var # instance of a variable or program argument
|
||||
|
||||
# attributes (only reserved words are included here)
|
||||
ATTR = dict
|
||||
def CLASS(v): return {'class': v}
|
||||
def FOR(v): return {'for': v}
|
||||
732
lib/python3.5/site-packages/lxml/html/clean.py
Normal file
732
lib/python3.5/site-packages/lxml/html/clean.py
Normal file
|
|
@ -0,0 +1,732 @@
|
|||
"""A cleanup tool for HTML.
|
||||
|
||||
Removes unwanted tags and content. See the `Cleaner` class for
|
||||
details.
|
||||
"""
|
||||
|
||||
import re
|
||||
import copy
|
||||
try:
|
||||
from urlparse import urlsplit
|
||||
except ImportError:
|
||||
# Python 3
|
||||
from urllib.parse import urlsplit
|
||||
from lxml import etree
|
||||
from lxml.html import defs
|
||||
from lxml.html import fromstring, tostring, XHTML_NAMESPACE
|
||||
from lxml.html import xhtml_to_html, _transform_result
|
||||
|
||||
try:
|
||||
unichr
|
||||
except NameError:
|
||||
# Python 3
|
||||
unichr = chr
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# Python 3
|
||||
unicode = str
|
||||
try:
|
||||
bytes
|
||||
except NameError:
|
||||
# Python < 2.6
|
||||
bytes = str
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
basestring = (str, bytes)
|
||||
|
||||
|
||||
__all__ = ['clean_html', 'clean', 'Cleaner', 'autolink', 'autolink_html',
|
||||
'word_break', 'word_break_html']
|
||||
|
||||
# Look at http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl
|
||||
# Particularly the CSS cleaning; most of the tag cleaning is integrated now
|
||||
# I have multiple kinds of schemes searched; but should schemes be
|
||||
# whitelisted instead?
|
||||
# max height?
|
||||
# remove images? Also in CSS? background attribute?
|
||||
# Some way to whitelist object, iframe, etc (e.g., if you want to
|
||||
# allow *just* embedded YouTube movies)
|
||||
# Log what was deleted and why?
|
||||
# style="behavior: ..." might be bad in IE?
|
||||
# Should we have something for just <meta http-equiv>? That's the worst of the
|
||||
# metas.
|
||||
# UTF-7 detections? Example:
|
||||
# <HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-
|
||||
# you don't always have to have the charset set, if the page has no charset
|
||||
# and there's UTF7-like code in it.
|
||||
# Look at these tests: http://htmlpurifier.org/live/smoketests/xssAttacks.php
|
||||
|
||||
|
||||
# This is an IE-specific construct you can have in a stylesheet to
|
||||
# run some Javascript:
|
||||
_css_javascript_re = re.compile(
|
||||
r'expression\s*\(.*?\)', re.S|re.I)
|
||||
|
||||
# Do I have to worry about @\nimport?
|
||||
_css_import_re = re.compile(
|
||||
r'@\s*import', re.I)
|
||||
|
||||
# All kinds of schemes besides just javascript: that can cause
|
||||
# execution:
|
||||
_is_image_dataurl = re.compile(
|
||||
r'^data:image/.+;base64', re.I).search
|
||||
_is_possibly_malicious_scheme = re.compile(
|
||||
r'(?:javascript|jscript|livescript|vbscript|data|about|mocha):',
|
||||
re.I).search
|
||||
def _is_javascript_scheme(s):
|
||||
if _is_image_dataurl(s):
|
||||
return None
|
||||
return _is_possibly_malicious_scheme(s)
|
||||
|
||||
_substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub
|
||||
# FIXME: should data: be blocked?
|
||||
|
||||
# FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx
|
||||
_conditional_comment_re = re.compile(
|
||||
r'\[if[\s\n\r]+.*?][\s\n\r]*>', re.I|re.S)
|
||||
|
||||
_find_styled_elements = etree.XPath(
|
||||
"descendant-or-self::*[@style]")
|
||||
|
||||
_find_external_links = etree.XPath(
|
||||
("descendant-or-self::a [normalize-space(@href) and substring(normalize-space(@href),1,1) != '#'] |"
|
||||
"descendant-or-self::x:a[normalize-space(@href) and substring(normalize-space(@href),1,1) != '#']"),
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
|
||||
class Cleaner(object):
|
||||
"""
|
||||
Instances cleans the document of each of the possible offending
|
||||
elements. The cleaning is controlled by attributes; you can
|
||||
override attributes in a subclass, or set them in the constructor.
|
||||
|
||||
``scripts``:
|
||||
Removes any ``<script>`` tags.
|
||||
|
||||
``javascript``:
|
||||
Removes any Javascript, like an ``onclick`` attribute. Also removes stylesheets
|
||||
as they could contain Javascript.
|
||||
|
||||
``comments``:
|
||||
Removes any comments.
|
||||
|
||||
``style``:
|
||||
Removes any style tags or attributes.
|
||||
|
||||
``links``:
|
||||
Removes any ``<link>`` tags
|
||||
|
||||
``meta``:
|
||||
Removes any ``<meta>`` tags
|
||||
|
||||
``page_structure``:
|
||||
Structural parts of a page: ``<head>``, ``<html>``, ``<title>``.
|
||||
|
||||
``processing_instructions``:
|
||||
Removes any processing instructions.
|
||||
|
||||
``embedded``:
|
||||
Removes any embedded objects (flash, iframes)
|
||||
|
||||
``frames``:
|
||||
Removes any frame-related tags
|
||||
|
||||
``forms``:
|
||||
Removes any form tags
|
||||
|
||||
``annoying_tags``:
|
||||
Tags that aren't *wrong*, but are annoying. ``<blink>`` and ``<marquee>``
|
||||
|
||||
``remove_tags``:
|
||||
A list of tags to remove. Only the tags will be removed,
|
||||
their content will get pulled up into the parent tag.
|
||||
|
||||
``kill_tags``:
|
||||
A list of tags to kill. Killing also removes the tag's content,
|
||||
i.e. the whole subtree, not just the tag itself.
|
||||
|
||||
``allow_tags``:
|
||||
A list of tags to include (default include all).
|
||||
|
||||
``remove_unknown_tags``:
|
||||
Remove any tags that aren't standard parts of HTML.
|
||||
|
||||
``safe_attrs_only``:
|
||||
If true, only include 'safe' attributes (specifically the list
|
||||
from the feedparser HTML sanitisation web site).
|
||||
|
||||
``safe_attrs``:
|
||||
A set of attribute names to override the default list of attributes
|
||||
considered 'safe' (when safe_attrs_only=True).
|
||||
|
||||
``add_nofollow``:
|
||||
If true, then any <a> tags will have ``rel="nofollow"`` added to them.
|
||||
|
||||
``host_whitelist``:
|
||||
A list or set of hosts that you can use for embedded content
|
||||
(for content like ``<object>``, ``<link rel="stylesheet">``, etc).
|
||||
You can also implement/override the method
|
||||
``allow_embedded_url(el, url)`` or ``allow_element(el)`` to
|
||||
implement more complex rules for what can be embedded.
|
||||
Anything that passes this test will be shown, regardless of
|
||||
the value of (for instance) ``embedded``.
|
||||
|
||||
Note that this parameter might not work as intended if you do not
|
||||
make the links absolute before doing the cleaning.
|
||||
|
||||
Note that you may also need to set ``whitelist_tags``.
|
||||
|
||||
``whitelist_tags``:
|
||||
A set of tags that can be included with ``host_whitelist``.
|
||||
The default is ``iframe`` and ``embed``; you may wish to
|
||||
include other tags like ``script``, or you may want to
|
||||
implement ``allow_embedded_url`` for more control. Set to None to
|
||||
include all tags.
|
||||
|
||||
This modifies the document *in place*.
|
||||
"""
|
||||
|
||||
scripts = True
|
||||
javascript = True
|
||||
comments = True
|
||||
style = False
|
||||
links = True
|
||||
meta = True
|
||||
page_structure = True
|
||||
processing_instructions = True
|
||||
embedded = True
|
||||
frames = True
|
||||
forms = True
|
||||
annoying_tags = True
|
||||
remove_tags = None
|
||||
allow_tags = None
|
||||
kill_tags = None
|
||||
remove_unknown_tags = True
|
||||
safe_attrs_only = True
|
||||
safe_attrs = defs.safe_attrs
|
||||
add_nofollow = False
|
||||
host_whitelist = ()
|
||||
whitelist_tags = set(['iframe', 'embed'])
|
||||
|
||||
def __init__(self, **kw):
|
||||
for name, value in kw.items():
|
||||
if not hasattr(self, name):
|
||||
raise TypeError(
|
||||
"Unknown parameter: %s=%r" % (name, value))
|
||||
setattr(self, name, value)
|
||||
|
||||
# Used to lookup the primary URL for a given tag that is up for
|
||||
# removal:
|
||||
_tag_link_attrs = dict(
|
||||
script='src',
|
||||
link='href',
|
||||
# From: http://java.sun.com/j2se/1.4.2/docs/guide/misc/applet.html
|
||||
# From what I can tell, both attributes can contain a link:
|
||||
applet=['code', 'object'],
|
||||
iframe='src',
|
||||
embed='src',
|
||||
layer='src',
|
||||
# FIXME: there doesn't really seem like a general way to figure out what
|
||||
# links an <object> tag uses; links often go in <param> tags with values
|
||||
# that we don't really know. You'd have to have knowledge about specific
|
||||
# kinds of plugins (probably keyed off classid), and match against those.
|
||||
##object=?,
|
||||
# FIXME: not looking at the action currently, because it is more complex
|
||||
# than than -- if you keep the form, you should keep the form controls.
|
||||
##form='action',
|
||||
a='href',
|
||||
)
|
||||
|
||||
def __call__(self, doc):
|
||||
"""
|
||||
Cleans the document.
|
||||
"""
|
||||
if hasattr(doc, 'getroot'):
|
||||
# ElementTree instance, instead of an element
|
||||
doc = doc.getroot()
|
||||
# convert XHTML to HTML
|
||||
xhtml_to_html(doc)
|
||||
# Normalize a case that IE treats <image> like <img>, and that
|
||||
# can confuse either this step or later steps.
|
||||
for el in doc.iter('image'):
|
||||
el.tag = 'img'
|
||||
if not self.comments:
|
||||
# Of course, if we were going to kill comments anyway, we don't
|
||||
# need to worry about this
|
||||
self.kill_conditional_comments(doc)
|
||||
|
||||
kill_tags = set(self.kill_tags or ())
|
||||
remove_tags = set(self.remove_tags or ())
|
||||
allow_tags = set(self.allow_tags or ())
|
||||
|
||||
if self.scripts:
|
||||
kill_tags.add('script')
|
||||
if self.safe_attrs_only:
|
||||
safe_attrs = set(self.safe_attrs)
|
||||
for el in doc.iter(etree.Element):
|
||||
attrib = el.attrib
|
||||
for aname in attrib.keys():
|
||||
if aname not in safe_attrs:
|
||||
del attrib[aname]
|
||||
if self.javascript:
|
||||
if not (self.safe_attrs_only and
|
||||
self.safe_attrs == defs.safe_attrs):
|
||||
# safe_attrs handles events attributes itself
|
||||
for el in doc.iter(etree.Element):
|
||||
attrib = el.attrib
|
||||
for aname in attrib.keys():
|
||||
if aname.startswith('on'):
|
||||
del attrib[aname]
|
||||
doc.rewrite_links(self._remove_javascript_link,
|
||||
resolve_base_href=False)
|
||||
if not self.style:
|
||||
# If we're deleting style then we don't have to remove JS links
|
||||
# from styles, otherwise...
|
||||
for el in _find_styled_elements(doc):
|
||||
old = el.get('style')
|
||||
new = _css_javascript_re.sub('', old)
|
||||
new = _css_import_re.sub('', new)
|
||||
if self._has_sneaky_javascript(new):
|
||||
# Something tricky is going on...
|
||||
del el.attrib['style']
|
||||
elif new != old:
|
||||
el.set('style', new)
|
||||
for el in list(doc.iter('style')):
|
||||
if el.get('type', '').lower().strip() == 'text/javascript':
|
||||
el.drop_tree()
|
||||
continue
|
||||
old = el.text or ''
|
||||
new = _css_javascript_re.sub('', old)
|
||||
# The imported CSS can do anything; we just can't allow:
|
||||
new = _css_import_re.sub('', old)
|
||||
if self._has_sneaky_javascript(new):
|
||||
# Something tricky is going on...
|
||||
el.text = '/* deleted */'
|
||||
elif new != old:
|
||||
el.text = new
|
||||
if self.comments or self.processing_instructions:
|
||||
# FIXME: why either? I feel like there's some obscure reason
|
||||
# because you can put PIs in comments...? But I've already
|
||||
# forgotten it
|
||||
kill_tags.add(etree.Comment)
|
||||
if self.processing_instructions:
|
||||
kill_tags.add(etree.ProcessingInstruction)
|
||||
if self.style:
|
||||
kill_tags.add('style')
|
||||
etree.strip_attributes(doc, 'style')
|
||||
if self.links:
|
||||
kill_tags.add('link')
|
||||
elif self.style or self.javascript:
|
||||
# We must get rid of included stylesheets if Javascript is not
|
||||
# allowed, as you can put Javascript in them
|
||||
for el in list(doc.iter('link')):
|
||||
if 'stylesheet' in el.get('rel', '').lower():
|
||||
# Note this kills alternate stylesheets as well
|
||||
if not self.allow_element(el):
|
||||
el.drop_tree()
|
||||
if self.meta:
|
||||
kill_tags.add('meta')
|
||||
if self.page_structure:
|
||||
remove_tags.update(('head', 'html', 'title'))
|
||||
if self.embedded:
|
||||
# FIXME: is <layer> really embedded?
|
||||
# We should get rid of any <param> tags not inside <applet>;
|
||||
# These are not really valid anyway.
|
||||
for el in list(doc.iter('param')):
|
||||
found_parent = False
|
||||
parent = el.getparent()
|
||||
while parent is not None and parent.tag not in ('applet', 'object'):
|
||||
parent = parent.getparent()
|
||||
if parent is None:
|
||||
el.drop_tree()
|
||||
kill_tags.update(('applet',))
|
||||
# The alternate contents that are in an iframe are a good fallback:
|
||||
remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param'))
|
||||
if self.frames:
|
||||
# FIXME: ideally we should look at the frame links, but
|
||||
# generally frames don't mix properly with an HTML
|
||||
# fragment anyway.
|
||||
kill_tags.update(defs.frame_tags)
|
||||
if self.forms:
|
||||
remove_tags.add('form')
|
||||
kill_tags.update(('button', 'input', 'select', 'textarea'))
|
||||
if self.annoying_tags:
|
||||
remove_tags.update(('blink', 'marquee'))
|
||||
|
||||
_remove = []
|
||||
_kill = []
|
||||
for el in doc.iter():
|
||||
if el.tag in kill_tags:
|
||||
if self.allow_element(el):
|
||||
continue
|
||||
_kill.append(el)
|
||||
elif el.tag in remove_tags:
|
||||
if self.allow_element(el):
|
||||
continue
|
||||
_remove.append(el)
|
||||
|
||||
if _remove and _remove[0] == doc:
|
||||
# We have to drop the parent-most tag, which we can't
|
||||
# do. Instead we'll rewrite it:
|
||||
el = _remove.pop(0)
|
||||
el.tag = 'div'
|
||||
el.attrib.clear()
|
||||
elif _kill and _kill[0] == doc:
|
||||
# We have to drop the parent-most element, which we can't
|
||||
# do. Instead we'll clear it:
|
||||
el = _kill.pop(0)
|
||||
if el.tag != 'html':
|
||||
el.tag = 'div'
|
||||
el.clear()
|
||||
|
||||
_kill.reverse() # start with innermost tags
|
||||
for el in _kill:
|
||||
el.drop_tree()
|
||||
for el in _remove:
|
||||
el.drop_tag()
|
||||
|
||||
if self.remove_unknown_tags:
|
||||
if allow_tags:
|
||||
raise ValueError(
|
||||
"It does not make sense to pass in both allow_tags and remove_unknown_tags")
|
||||
allow_tags = set(defs.tags)
|
||||
if allow_tags:
|
||||
bad = []
|
||||
for el in doc.iter():
|
||||
if el.tag not in allow_tags:
|
||||
bad.append(el)
|
||||
if bad:
|
||||
if bad[0] is doc:
|
||||
el = bad.pop(0)
|
||||
el.tag = 'div'
|
||||
el.attrib.clear()
|
||||
for el in bad:
|
||||
el.drop_tag()
|
||||
if self.add_nofollow:
|
||||
for el in _find_external_links(doc):
|
||||
if not self.allow_follow(el):
|
||||
rel = el.get('rel')
|
||||
if rel:
|
||||
if ('nofollow' in rel
|
||||
and ' nofollow ' in (' %s ' % rel)):
|
||||
continue
|
||||
rel = '%s nofollow' % rel
|
||||
else:
|
||||
rel = 'nofollow'
|
||||
el.set('rel', rel)
|
||||
|
||||
def allow_follow(self, anchor):
|
||||
"""
|
||||
Override to suppress rel="nofollow" on some anchors.
|
||||
"""
|
||||
return False
|
||||
|
||||
def allow_element(self, el):
|
||||
if el.tag not in self._tag_link_attrs:
|
||||
return False
|
||||
attr = self._tag_link_attrs[el.tag]
|
||||
if isinstance(attr, (list, tuple)):
|
||||
for one_attr in attr:
|
||||
url = el.get(one_attr)
|
||||
if not url:
|
||||
return False
|
||||
if not self.allow_embedded_url(el, url):
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
url = el.get(attr)
|
||||
if not url:
|
||||
return False
|
||||
return self.allow_embedded_url(el, url)
|
||||
|
||||
def allow_embedded_url(self, el, url):
|
||||
if (self.whitelist_tags is not None
|
||||
and el.tag not in self.whitelist_tags):
|
||||
return False
|
||||
scheme, netloc, path, query, fragment = urlsplit(url)
|
||||
netloc = netloc.lower().split(':', 1)[0]
|
||||
if scheme not in ('http', 'https'):
|
||||
return False
|
||||
if netloc in self.host_whitelist:
|
||||
return True
|
||||
return False
|
||||
|
||||
def kill_conditional_comments(self, doc):
|
||||
"""
|
||||
IE conditional comments basically embed HTML that the parser
|
||||
doesn't normally see. We can't allow anything like that, so
|
||||
we'll kill any comments that could be conditional.
|
||||
"""
|
||||
bad = []
|
||||
self._kill_elements(
|
||||
doc, lambda el: _conditional_comment_re.search(el.text),
|
||||
etree.Comment)
|
||||
|
||||
def _kill_elements(self, doc, condition, iterate=None):
|
||||
bad = []
|
||||
for el in doc.iter(iterate):
|
||||
if condition(el):
|
||||
bad.append(el)
|
||||
for el in bad:
|
||||
el.drop_tree()
|
||||
|
||||
def _remove_javascript_link(self, link):
|
||||
# links like "j a v a s c r i p t:" might be interpreted in IE
|
||||
new = _substitute_whitespace('', link)
|
||||
if _is_javascript_scheme(new):
|
||||
# FIXME: should this be None to delete?
|
||||
return ''
|
||||
return link
|
||||
|
||||
_substitute_comments = re.compile(r'/\*.*?\*/', re.S).sub
|
||||
|
||||
def _has_sneaky_javascript(self, style):
|
||||
"""
|
||||
Depending on the browser, stuff like ``e x p r e s s i o n(...)``
|
||||
can get interpreted, or ``expre/* stuff */ssion(...)``. This
|
||||
checks for attempt to do stuff like this.
|
||||
|
||||
Typically the response will be to kill the entire style; if you
|
||||
have just a bit of Javascript in the style another rule will catch
|
||||
that and remove only the Javascript from the style; this catches
|
||||
more sneaky attempts.
|
||||
"""
|
||||
style = self._substitute_comments('', style)
|
||||
style = style.replace('\\', '')
|
||||
style = _substitute_whitespace('', style)
|
||||
style = style.lower()
|
||||
if 'javascript:' in style:
|
||||
return True
|
||||
if 'expression(' in style:
|
||||
return True
|
||||
return False
|
||||
|
||||
def clean_html(self, html):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
self(doc)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
clean = Cleaner()
|
||||
clean_html = clean.clean_html
|
||||
|
||||
############################################################
|
||||
## Autolinking
|
||||
############################################################
|
||||
|
||||
_link_regexes = [
|
||||
re.compile(r'(?P<body>https?://(?P<host>[a-z0-9._-]+)(?:/[/\-_.,a-z0-9%&?;=~]*)?(?:\([/\-_.,a-z0-9%&?;=~]*\))?)', re.I),
|
||||
# This is conservative, but autolinking can be a bit conservative:
|
||||
re.compile(r'mailto:(?P<body>[a-z0-9._-]+@(?P<host>[a-z0-9_._]+[a-z]))', re.I),
|
||||
]
|
||||
|
||||
_avoid_elements = ['textarea', 'pre', 'code', 'head', 'select', 'a']
|
||||
|
||||
_avoid_hosts = [
|
||||
re.compile(r'^localhost', re.I),
|
||||
re.compile(r'\bexample\.(?:com|org|net)$', re.I),
|
||||
re.compile(r'^127\.0\.0\.1$'),
|
||||
]
|
||||
|
||||
_avoid_classes = ['nolink']
|
||||
|
||||
def autolink(el, link_regexes=_link_regexes,
|
||||
avoid_elements=_avoid_elements,
|
||||
avoid_hosts=_avoid_hosts,
|
||||
avoid_classes=_avoid_classes):
|
||||
"""
|
||||
Turn any URLs into links.
|
||||
|
||||
It will search for links identified by the given regular
|
||||
expressions (by default mailto and http(s) links).
|
||||
|
||||
It won't link text in an element in avoid_elements, or an element
|
||||
with a class in avoid_classes. It won't link to anything with a
|
||||
host that matches one of the regular expressions in avoid_hosts
|
||||
(default localhost and 127.0.0.1).
|
||||
|
||||
If you pass in an element, the element's tail will not be
|
||||
substituted, only the contents of the element.
|
||||
"""
|
||||
if el.tag in avoid_elements:
|
||||
return
|
||||
class_name = el.get('class')
|
||||
if class_name:
|
||||
class_name = class_name.split()
|
||||
for match_class in avoid_classes:
|
||||
if match_class in class_name:
|
||||
return
|
||||
for child in list(el):
|
||||
autolink(child, link_regexes=link_regexes,
|
||||
avoid_elements=avoid_elements,
|
||||
avoid_hosts=avoid_hosts,
|
||||
avoid_classes=avoid_classes)
|
||||
if child.tail:
|
||||
text, tail_children = _link_text(
|
||||
child.tail, link_regexes, avoid_hosts, factory=el.makeelement)
|
||||
if tail_children:
|
||||
child.tail = text
|
||||
index = el.index(child)
|
||||
el[index+1:index+1] = tail_children
|
||||
if el.text:
|
||||
text, pre_children = _link_text(
|
||||
el.text, link_regexes, avoid_hosts, factory=el.makeelement)
|
||||
if pre_children:
|
||||
el.text = text
|
||||
el[:0] = pre_children
|
||||
|
||||
def _link_text(text, link_regexes, avoid_hosts, factory):
|
||||
leading_text = ''
|
||||
links = []
|
||||
last_pos = 0
|
||||
while 1:
|
||||
best_match, best_pos = None, None
|
||||
for regex in link_regexes:
|
||||
regex_pos = last_pos
|
||||
while 1:
|
||||
match = regex.search(text, pos=regex_pos)
|
||||
if match is None:
|
||||
break
|
||||
host = match.group('host')
|
||||
for host_regex in avoid_hosts:
|
||||
if host_regex.search(host):
|
||||
regex_pos = match.end()
|
||||
break
|
||||
else:
|
||||
break
|
||||
if match is None:
|
||||
continue
|
||||
if best_pos is None or match.start() < best_pos:
|
||||
best_match = match
|
||||
best_pos = match.start()
|
||||
if best_match is None:
|
||||
# No more matches
|
||||
if links:
|
||||
assert not links[-1].tail
|
||||
links[-1].tail = text
|
||||
else:
|
||||
assert not leading_text
|
||||
leading_text = text
|
||||
break
|
||||
link = best_match.group(0)
|
||||
end = best_match.end()
|
||||
if link.endswith('.') or link.endswith(','):
|
||||
# These punctuation marks shouldn't end a link
|
||||
end -= 1
|
||||
link = link[:-1]
|
||||
prev_text = text[:best_match.start()]
|
||||
if links:
|
||||
assert not links[-1].tail
|
||||
links[-1].tail = prev_text
|
||||
else:
|
||||
assert not leading_text
|
||||
leading_text = prev_text
|
||||
anchor = factory('a')
|
||||
anchor.set('href', link)
|
||||
body = best_match.group('body')
|
||||
if not body:
|
||||
body = link
|
||||
if body.endswith('.') or body.endswith(','):
|
||||
body = body[:-1]
|
||||
anchor.text = body
|
||||
links.append(anchor)
|
||||
text = text[end:]
|
||||
return leading_text, links
|
||||
|
||||
def autolink_html(html, *args, **kw):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
autolink(doc, *args, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
autolink_html.__doc__ = autolink.__doc__
|
||||
|
||||
############################################################
|
||||
## Word wrapping
|
||||
############################################################
|
||||
|
||||
_avoid_word_break_elements = ['pre', 'textarea', 'code']
|
||||
_avoid_word_break_classes = ['nobreak']
|
||||
|
||||
def word_break(el, max_width=40,
|
||||
avoid_elements=_avoid_word_break_elements,
|
||||
avoid_classes=_avoid_word_break_classes,
|
||||
break_character=unichr(0x200b)):
|
||||
"""
|
||||
Breaks any long words found in the body of the text (not attributes).
|
||||
|
||||
Doesn't effect any of the tags in avoid_elements, by default
|
||||
``<textarea>`` and ``<pre>``
|
||||
|
||||
Breaks words by inserting ​, which is a unicode character
|
||||
for Zero Width Space character. This generally takes up no space
|
||||
in rendering, but does copy as a space, and in monospace contexts
|
||||
usually takes up space.
|
||||
|
||||
See http://www.cs.tut.fi/~jkorpela/html/nobr.html for a discussion
|
||||
"""
|
||||
# Character suggestion of ​ comes from:
|
||||
# http://www.cs.tut.fi/~jkorpela/html/nobr.html
|
||||
if el.tag in _avoid_word_break_elements:
|
||||
return
|
||||
class_name = el.get('class')
|
||||
if class_name:
|
||||
dont_break = False
|
||||
class_name = class_name.split()
|
||||
for avoid in avoid_classes:
|
||||
if avoid in class_name:
|
||||
dont_break = True
|
||||
break
|
||||
if dont_break:
|
||||
return
|
||||
if el.text:
|
||||
el.text = _break_text(el.text, max_width, break_character)
|
||||
for child in el:
|
||||
word_break(child, max_width=max_width,
|
||||
avoid_elements=avoid_elements,
|
||||
avoid_classes=avoid_classes,
|
||||
break_character=break_character)
|
||||
if child.tail:
|
||||
child.tail = _break_text(child.tail, max_width, break_character)
|
||||
|
||||
def word_break_html(html, *args, **kw):
|
||||
result_type = type(html)
|
||||
doc = fromstring(html)
|
||||
word_break(doc, *args, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _break_text(text, max_width, break_character):
|
||||
words = text.split()
|
||||
for word in words:
|
||||
if len(word) > max_width:
|
||||
replacement = _insert_break(word, max_width, break_character)
|
||||
text = text.replace(word, replacement)
|
||||
return text
|
||||
|
||||
_break_prefer_re = re.compile(r'[^a-z]', re.I)
|
||||
|
||||
def _insert_break(word, width, break_character):
|
||||
orig_word = word
|
||||
result = ''
|
||||
while len(word) > width:
|
||||
start = word[:width]
|
||||
breaks = list(_break_prefer_re.finditer(start))
|
||||
if breaks:
|
||||
last_break = breaks[-1]
|
||||
# Only walk back up to 10 characters to find a nice break:
|
||||
if last_break.end() > width-10:
|
||||
# FIXME: should the break character be at the end of the
|
||||
# chunk, or the beginning of the next chunk?
|
||||
start = word[:last_break.end()]
|
||||
result += start + break_character
|
||||
word = word[len(start):]
|
||||
result += word
|
||||
return result
|
||||
|
||||
137
lib/python3.5/site-packages/lxml/html/defs.py
Normal file
137
lib/python3.5/site-packages/lxml/html/defs.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
# FIXME: this should all be confirmed against what a DTD says
|
||||
# (probably in a test; this may not match the DTD exactly, but we
|
||||
# should document just how it differs).
|
||||
|
||||
# Data taken from http://www.w3.org/TR/html401/index/elements.html
|
||||
# and http://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements
|
||||
# for html5_tags.
|
||||
|
||||
try:
|
||||
frozenset
|
||||
except NameError:
|
||||
from sets import Set as frozenset
|
||||
|
||||
|
||||
empty_tags = frozenset([
|
||||
'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
|
||||
'img', 'input', 'isindex', 'link', 'meta', 'param'])
|
||||
|
||||
deprecated_tags = frozenset([
|
||||
'applet', 'basefont', 'center', 'dir', 'font', 'isindex',
|
||||
'menu', 's', 'strike', 'u'])
|
||||
|
||||
# archive actually takes a space-separated list of URIs
|
||||
link_attrs = frozenset([
|
||||
'action', 'archive', 'background', 'cite', 'classid',
|
||||
'codebase', 'data', 'href', 'longdesc', 'profile', 'src',
|
||||
'usemap',
|
||||
# Not standard:
|
||||
'dynsrc', 'lowsrc',
|
||||
])
|
||||
|
||||
# Not in the HTML 4 spec:
|
||||
# onerror, onresize
|
||||
event_attrs = frozenset([
|
||||
'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
|
||||
'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
|
||||
'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover',
|
||||
'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit',
|
||||
'onunload',
|
||||
])
|
||||
|
||||
safe_attrs = frozenset([
|
||||
'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
|
||||
'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
|
||||
'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
|
||||
'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
|
||||
'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
|
||||
'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
|
||||
'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
|
||||
'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
|
||||
'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
|
||||
'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
|
||||
|
||||
# From http://htmlhelp.com/reference/html40/olist.html
|
||||
top_level_tags = frozenset([
|
||||
'html', 'head', 'body', 'frameset',
|
||||
])
|
||||
|
||||
head_tags = frozenset([
|
||||
'base', 'isindex', 'link', 'meta', 'script', 'style', 'title',
|
||||
])
|
||||
|
||||
general_block_tags = frozenset([
|
||||
'address',
|
||||
'blockquote',
|
||||
'center',
|
||||
'del',
|
||||
'div',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'ins',
|
||||
'isindex',
|
||||
'noscript',
|
||||
'p',
|
||||
'pre',
|
||||
])
|
||||
|
||||
list_tags = frozenset([
|
||||
'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul',
|
||||
])
|
||||
|
||||
table_tags = frozenset([
|
||||
'table', 'caption', 'colgroup', 'col',
|
||||
'thead', 'tfoot', 'tbody', 'tr', 'td', 'th',
|
||||
])
|
||||
|
||||
# just this one from
|
||||
# http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm
|
||||
block_tags = general_block_tags | list_tags | table_tags | frozenset([
|
||||
# Partial form tags
|
||||
'fieldset', 'form', 'legend', 'optgroup', 'option',
|
||||
])
|
||||
|
||||
form_tags = frozenset([
|
||||
'form', 'button', 'fieldset', 'legend', 'input', 'label',
|
||||
'select', 'optgroup', 'option', 'textarea',
|
||||
])
|
||||
|
||||
special_inline_tags = frozenset([
|
||||
'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe',
|
||||
'img', 'map', 'area', 'object', 'param', 'q', 'script',
|
||||
'span', 'sub', 'sup',
|
||||
])
|
||||
|
||||
phrase_tags = frozenset([
|
||||
'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em',
|
||||
'ins', 'kbd', 'samp', 'strong', 'var',
|
||||
])
|
||||
|
||||
font_style_tags = frozenset([
|
||||
'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u',
|
||||
])
|
||||
|
||||
frame_tags = frozenset([
|
||||
'frameset', 'frame', 'noframes',
|
||||
])
|
||||
|
||||
html5_tags = frozenset([
|
||||
'article', 'aside', 'audio', 'canvas', 'command', 'datalist',
|
||||
'details', 'embed', 'figcaption', 'figure', 'footer', 'header',
|
||||
'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output',
|
||||
'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary',
|
||||
'svg', 'time', 'track', 'video', 'wbr'
|
||||
])
|
||||
|
||||
# These tags aren't standard
|
||||
nonstandard_tags = frozenset(['blink', 'marquee'])
|
||||
|
||||
|
||||
tags = (top_level_tags | head_tags | general_block_tags | list_tags
|
||||
| table_tags | form_tags | special_inline_tags | phrase_tags
|
||||
| font_style_tags | nonstandard_tags | html5_tags)
|
||||
881
lib/python3.5/site-packages/lxml/html/diff.py
Normal file
881
lib/python3.5/site-packages/lxml/html/diff.py
Normal file
|
|
@ -0,0 +1,881 @@
|
|||
import difflib
|
||||
from lxml import etree
|
||||
from lxml.html import fragment_fromstring
|
||||
import re
|
||||
|
||||
__all__ = ['html_annotate', 'htmldiff']
|
||||
|
||||
try:
|
||||
from html import escape as html_escape
|
||||
except ImportError:
|
||||
from cgi import escape as html_escape
|
||||
try:
|
||||
_unicode = unicode
|
||||
except NameError:
|
||||
# Python 3
|
||||
_unicode = str
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
############################################################
|
||||
## Annotation
|
||||
############################################################
|
||||
|
||||
def default_markup(text, version):
|
||||
return '<span title="%s">%s</span>' % (
|
||||
html_escape(_unicode(version), 1), text)
|
||||
|
||||
def html_annotate(doclist, markup=default_markup):
|
||||
"""
|
||||
doclist should be ordered from oldest to newest, like::
|
||||
|
||||
>>> version1 = 'Hello World'
|
||||
>>> version2 = 'Goodbye World'
|
||||
>>> print(html_annotate([(version1, 'version 1'),
|
||||
... (version2, 'version 2')]))
|
||||
<span title="version 2">Goodbye</span> <span title="version 1">World</span>
|
||||
|
||||
The documents must be *fragments* (str/UTF8 or unicode), not
|
||||
complete documents
|
||||
|
||||
The markup argument is a function to markup the spans of words.
|
||||
This function is called like markup('Hello', 'version 2'), and
|
||||
returns HTML. The first argument is text and never includes any
|
||||
markup. The default uses a span with a title:
|
||||
|
||||
>>> print(default_markup('Some Text', 'by Joe'))
|
||||
<span title="by Joe">Some Text</span>
|
||||
"""
|
||||
# The basic strategy we have is to split the documents up into
|
||||
# logical tokens (which are words with attached markup). We then
|
||||
# do diffs of each of the versions to track when a token first
|
||||
# appeared in the document; the annotation attached to the token
|
||||
# is the version where it first appeared.
|
||||
tokenlist = [tokenize_annotated(doc, version)
|
||||
for doc, version in doclist]
|
||||
cur_tokens = tokenlist[0]
|
||||
for tokens in tokenlist[1:]:
|
||||
html_annotate_merge_annotations(cur_tokens, tokens)
|
||||
cur_tokens = tokens
|
||||
|
||||
# After we've tracked all the tokens, we can combine spans of text
|
||||
# that are adjacent and have the same annotation
|
||||
cur_tokens = compress_tokens(cur_tokens)
|
||||
# And finally add markup
|
||||
result = markup_serialize_tokens(cur_tokens, markup)
|
||||
return ''.join(result).strip()
|
||||
|
||||
def tokenize_annotated(doc, annotation):
|
||||
"""Tokenize a document and add an annotation attribute to each token
|
||||
"""
|
||||
tokens = tokenize(doc, include_hrefs=False)
|
||||
for tok in tokens:
|
||||
tok.annotation = annotation
|
||||
return tokens
|
||||
|
||||
def html_annotate_merge_annotations(tokens_old, tokens_new):
|
||||
"""Merge the annotations from tokens_old into tokens_new, when the
|
||||
tokens in the new document already existed in the old document.
|
||||
"""
|
||||
s = InsensitiveSequenceMatcher(a=tokens_old, b=tokens_new)
|
||||
commands = s.get_opcodes()
|
||||
|
||||
for command, i1, i2, j1, j2 in commands:
|
||||
if command == 'equal':
|
||||
eq_old = tokens_old[i1:i2]
|
||||
eq_new = tokens_new[j1:j2]
|
||||
copy_annotations(eq_old, eq_new)
|
||||
|
||||
def copy_annotations(src, dest):
|
||||
"""
|
||||
Copy annotations from the tokens listed in src to the tokens in dest
|
||||
"""
|
||||
assert len(src) == len(dest)
|
||||
for src_tok, dest_tok in zip(src, dest):
|
||||
dest_tok.annotation = src_tok.annotation
|
||||
|
||||
def compress_tokens(tokens):
|
||||
"""
|
||||
Combine adjacent tokens when there is no HTML between the tokens,
|
||||
and they share an annotation
|
||||
"""
|
||||
result = [tokens[0]]
|
||||
for tok in tokens[1:]:
|
||||
if (not result[-1].post_tags and
|
||||
not tok.pre_tags and
|
||||
result[-1].annotation == tok.annotation):
|
||||
compress_merge_back(result, tok)
|
||||
else:
|
||||
result.append(tok)
|
||||
return result
|
||||
|
||||
def compress_merge_back(tokens, tok):
|
||||
""" Merge tok into the last element of tokens (modifying the list of
|
||||
tokens in-place). """
|
||||
last = tokens[-1]
|
||||
if type(last) is not token or type(tok) is not token:
|
||||
tokens.append(tok)
|
||||
else:
|
||||
text = _unicode(last)
|
||||
if last.trailing_whitespace:
|
||||
text += last.trailing_whitespace
|
||||
text += tok
|
||||
merged = token(text,
|
||||
pre_tags=last.pre_tags,
|
||||
post_tags=tok.post_tags,
|
||||
trailing_whitespace=tok.trailing_whitespace)
|
||||
merged.annotation = last.annotation
|
||||
tokens[-1] = merged
|
||||
|
||||
def markup_serialize_tokens(tokens, markup_func):
|
||||
"""
|
||||
Serialize the list of tokens into a list of text chunks, calling
|
||||
markup_func around text to add annotations.
|
||||
"""
|
||||
for token in tokens:
|
||||
for pre in token.pre_tags:
|
||||
yield pre
|
||||
html = token.html()
|
||||
html = markup_func(html, token.annotation)
|
||||
if token.trailing_whitespace:
|
||||
html += token.trailing_whitespace
|
||||
yield html
|
||||
for post in token.post_tags:
|
||||
yield post
|
||||
|
||||
|
||||
############################################################
|
||||
## HTML Diffs
|
||||
############################################################
|
||||
|
||||
def htmldiff(old_html, new_html):
|
||||
## FIXME: this should take parsed documents too, and use their body
|
||||
## or other content.
|
||||
""" Do a diff of the old and new document. The documents are HTML
|
||||
*fragments* (str/UTF8 or unicode), they are not complete documents
|
||||
(i.e., no <html> tag).
|
||||
|
||||
Returns HTML with <ins> and <del> tags added around the
|
||||
appropriate text.
|
||||
|
||||
Markup is generally ignored, with the markup from new_html
|
||||
preserved, and possibly some markup from old_html (though it is
|
||||
considered acceptable to lose some of the old markup). Only the
|
||||
words in the HTML are diffed. The exception is <img> tags, which
|
||||
are treated like words, and the href attribute of <a> tags, which
|
||||
are noted inside the tag itself when there are changes.
|
||||
"""
|
||||
old_html_tokens = tokenize(old_html)
|
||||
new_html_tokens = tokenize(new_html)
|
||||
result = htmldiff_tokens(old_html_tokens, new_html_tokens)
|
||||
result = ''.join(result).strip()
|
||||
return fixup_ins_del_tags(result)
|
||||
|
||||
def htmldiff_tokens(html1_tokens, html2_tokens):
|
||||
""" Does a diff on the tokens themselves, returning a list of text
|
||||
chunks (not tokens).
|
||||
"""
|
||||
# There are several passes as we do the differences. The tokens
|
||||
# isolate the portion of the content we care to diff; difflib does
|
||||
# all the actual hard work at that point.
|
||||
#
|
||||
# Then we must create a valid document from pieces of both the old
|
||||
# document and the new document. We generally prefer to take
|
||||
# markup from the new document, and only do a best effort attempt
|
||||
# to keep markup from the old document; anything that we can't
|
||||
# resolve we throw away. Also we try to put the deletes as close
|
||||
# to the location where we think they would have been -- because
|
||||
# we are only keeping the markup from the new document, it can be
|
||||
# fuzzy where in the new document the old text would have gone.
|
||||
# Again we just do a best effort attempt.
|
||||
s = InsensitiveSequenceMatcher(a=html1_tokens, b=html2_tokens)
|
||||
commands = s.get_opcodes()
|
||||
result = []
|
||||
for command, i1, i2, j1, j2 in commands:
|
||||
if command == 'equal':
|
||||
result.extend(expand_tokens(html2_tokens[j1:j2], equal=True))
|
||||
continue
|
||||
if command == 'insert' or command == 'replace':
|
||||
ins_tokens = expand_tokens(html2_tokens[j1:j2])
|
||||
merge_insert(ins_tokens, result)
|
||||
if command == 'delete' or command == 'replace':
|
||||
del_tokens = expand_tokens(html1_tokens[i1:i2])
|
||||
merge_delete(del_tokens, result)
|
||||
# If deletes were inserted directly as <del> then we'd have an
|
||||
# invalid document at this point. Instead we put in special
|
||||
# markers, and when the complete diffed document has been created
|
||||
# we try to move the deletes around and resolve any problems.
|
||||
result = cleanup_delete(result)
|
||||
|
||||
return result
|
||||
|
||||
def expand_tokens(tokens, equal=False):
|
||||
"""Given a list of tokens, return a generator of the chunks of
|
||||
text for the data in the tokens.
|
||||
"""
|
||||
for token in tokens:
|
||||
for pre in token.pre_tags:
|
||||
yield pre
|
||||
if not equal or not token.hide_when_equal:
|
||||
if token.trailing_whitespace:
|
||||
yield token.html() + token.trailing_whitespace
|
||||
else:
|
||||
yield token.html()
|
||||
for post in token.post_tags:
|
||||
yield post
|
||||
|
||||
def merge_insert(ins_chunks, doc):
|
||||
""" doc is the already-handled document (as a list of text chunks);
|
||||
here we add <ins>ins_chunks</ins> to the end of that. """
|
||||
# Though we don't throw away unbalanced_start or unbalanced_end
|
||||
# (we assume there is accompanying markup later or earlier in the
|
||||
# document), we only put <ins> around the balanced portion.
|
||||
unbalanced_start, balanced, unbalanced_end = split_unbalanced(ins_chunks)
|
||||
doc.extend(unbalanced_start)
|
||||
if doc and not doc[-1].endswith(' '):
|
||||
# Fix up the case where the word before the insert didn't end with
|
||||
# a space
|
||||
doc[-1] += ' '
|
||||
doc.append('<ins>')
|
||||
if balanced and balanced[-1].endswith(' '):
|
||||
# We move space outside of </ins>
|
||||
balanced[-1] = balanced[-1][:-1]
|
||||
doc.extend(balanced)
|
||||
doc.append('</ins> ')
|
||||
doc.extend(unbalanced_end)
|
||||
|
||||
# These are sentinals to represent the start and end of a <del>
|
||||
# segment, until we do the cleanup phase to turn them into proper
|
||||
# markup:
|
||||
class DEL_START:
|
||||
pass
|
||||
class DEL_END:
|
||||
pass
|
||||
|
||||
class NoDeletes(Exception):
|
||||
""" Raised when the document no longer contains any pending deletes
|
||||
(DEL_START/DEL_END) """
|
||||
|
||||
def merge_delete(del_chunks, doc):
|
||||
""" Adds the text chunks in del_chunks to the document doc (another
|
||||
list of text chunks) with marker to show it is a delete.
|
||||
cleanup_delete later resolves these markers into <del> tags."""
|
||||
doc.append(DEL_START)
|
||||
doc.extend(del_chunks)
|
||||
doc.append(DEL_END)
|
||||
|
||||
def cleanup_delete(chunks):
|
||||
""" Cleans up any DEL_START/DEL_END markers in the document, replacing
|
||||
them with <del></del>. To do this while keeping the document
|
||||
valid, it may need to drop some tags (either start or end tags).
|
||||
|
||||
It may also move the del into adjacent tags to try to move it to a
|
||||
similar location where it was originally located (e.g., moving a
|
||||
delete into preceding <div> tag, if the del looks like (DEL_START,
|
||||
'Text</div>', DEL_END)"""
|
||||
while 1:
|
||||
# Find a pending DEL_START/DEL_END, splitting the document
|
||||
# into stuff-preceding-DEL_START, stuff-inside, and
|
||||
# stuff-following-DEL_END
|
||||
try:
|
||||
pre_delete, delete, post_delete = split_delete(chunks)
|
||||
except NoDeletes:
|
||||
# Nothing found, we've cleaned up the entire doc
|
||||
break
|
||||
# The stuff-inside-DEL_START/END may not be well balanced
|
||||
# markup. First we figure out what unbalanced portions there are:
|
||||
unbalanced_start, balanced, unbalanced_end = split_unbalanced(delete)
|
||||
# Then we move the span forward and/or backward based on these
|
||||
# unbalanced portions:
|
||||
locate_unbalanced_start(unbalanced_start, pre_delete, post_delete)
|
||||
locate_unbalanced_end(unbalanced_end, pre_delete, post_delete)
|
||||
doc = pre_delete
|
||||
if doc and not doc[-1].endswith(' '):
|
||||
# Fix up case where the word before us didn't have a trailing space
|
||||
doc[-1] += ' '
|
||||
doc.append('<del>')
|
||||
if balanced and balanced[-1].endswith(' '):
|
||||
# We move space outside of </del>
|
||||
balanced[-1] = balanced[-1][:-1]
|
||||
doc.extend(balanced)
|
||||
doc.append('</del> ')
|
||||
doc.extend(post_delete)
|
||||
chunks = doc
|
||||
return chunks
|
||||
|
||||
def split_unbalanced(chunks):
|
||||
"""Return (unbalanced_start, balanced, unbalanced_end), where each is
|
||||
a list of text and tag chunks.
|
||||
|
||||
unbalanced_start is a list of all the tags that are opened, but
|
||||
not closed in this span. Similarly, unbalanced_end is a list of
|
||||
tags that are closed but were not opened. Extracting these might
|
||||
mean some reordering of the chunks."""
|
||||
start = []
|
||||
end = []
|
||||
tag_stack = []
|
||||
balanced = []
|
||||
for chunk in chunks:
|
||||
if not chunk.startswith('<'):
|
||||
balanced.append(chunk)
|
||||
continue
|
||||
endtag = chunk[1] == '/'
|
||||
name = chunk.split()[0].strip('<>/')
|
||||
if name in empty_tags:
|
||||
balanced.append(chunk)
|
||||
continue
|
||||
if endtag:
|
||||
if tag_stack and tag_stack[-1][0] == name:
|
||||
balanced.append(chunk)
|
||||
name, pos, tag = tag_stack.pop()
|
||||
balanced[pos] = tag
|
||||
elif tag_stack:
|
||||
start.extend([tag for name, pos, tag in tag_stack])
|
||||
tag_stack = []
|
||||
end.append(chunk)
|
||||
else:
|
||||
end.append(chunk)
|
||||
else:
|
||||
tag_stack.append((name, len(balanced), chunk))
|
||||
balanced.append(None)
|
||||
start.extend(
|
||||
[chunk for name, pos, chunk in tag_stack])
|
||||
balanced = [chunk for chunk in balanced if chunk is not None]
|
||||
return start, balanced, end
|
||||
|
||||
def split_delete(chunks):
|
||||
""" Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END,
|
||||
stuff_after_DEL_END). Returns the first case found (there may be
|
||||
more DEL_STARTs in stuff_after_DEL_END). Raises NoDeletes if
|
||||
there's no DEL_START found. """
|
||||
try:
|
||||
pos = chunks.index(DEL_START)
|
||||
except ValueError:
|
||||
raise NoDeletes
|
||||
pos2 = chunks.index(DEL_END)
|
||||
return chunks[:pos], chunks[pos+1:pos2], chunks[pos2+1:]
|
||||
|
||||
def locate_unbalanced_start(unbalanced_start, pre_delete, post_delete):
|
||||
""" pre_delete and post_delete implicitly point to a place in the
|
||||
document (where the two were split). This moves that point (by
|
||||
popping items from one and pushing them onto the other). It moves
|
||||
the point to try to find a place where unbalanced_start applies.
|
||||
|
||||
As an example::
|
||||
|
||||
>>> unbalanced_start = ['<div>']
|
||||
>>> doc = ['<p>', 'Text', '</p>', '<div>', 'More Text', '</div>']
|
||||
>>> pre, post = doc[:3], doc[3:]
|
||||
>>> pre, post
|
||||
(['<p>', 'Text', '</p>'], ['<div>', 'More Text', '</div>'])
|
||||
>>> locate_unbalanced_start(unbalanced_start, pre, post)
|
||||
>>> pre, post
|
||||
(['<p>', 'Text', '</p>', '<div>'], ['More Text', '</div>'])
|
||||
|
||||
As you can see, we moved the point so that the dangling <div> that
|
||||
we found will be effectively replaced by the div in the original
|
||||
document. If this doesn't work out, we just throw away
|
||||
unbalanced_start without doing anything.
|
||||
"""
|
||||
while 1:
|
||||
if not unbalanced_start:
|
||||
# We have totally succeded in finding the position
|
||||
break
|
||||
finding = unbalanced_start[0]
|
||||
finding_name = finding.split()[0].strip('<>')
|
||||
if not post_delete:
|
||||
break
|
||||
next = post_delete[0]
|
||||
if next is DEL_START or not next.startswith('<'):
|
||||
# Reached a word, we can't move the delete text forward
|
||||
break
|
||||
if next[1] == '/':
|
||||
# Reached a closing tag, can we go further? Maybe not...
|
||||
break
|
||||
name = next.split()[0].strip('<>')
|
||||
if name == 'ins':
|
||||
# Can't move into an insert
|
||||
break
|
||||
assert name != 'del', (
|
||||
"Unexpected delete tag: %r" % next)
|
||||
if name == finding_name:
|
||||
unbalanced_start.pop(0)
|
||||
pre_delete.append(post_delete.pop(0))
|
||||
else:
|
||||
# Found a tag that doesn't match
|
||||
break
|
||||
|
||||
def locate_unbalanced_end(unbalanced_end, pre_delete, post_delete):
|
||||
""" like locate_unbalanced_start, except handling end tags and
|
||||
possibly moving the point earlier in the document. """
|
||||
while 1:
|
||||
if not unbalanced_end:
|
||||
# Success
|
||||
break
|
||||
finding = unbalanced_end[-1]
|
||||
finding_name = finding.split()[0].strip('<>/')
|
||||
if not pre_delete:
|
||||
break
|
||||
next = pre_delete[-1]
|
||||
if next is DEL_END or not next.startswith('</'):
|
||||
# A word or a start tag
|
||||
break
|
||||
name = next.split()[0].strip('<>/')
|
||||
if name == 'ins' or name == 'del':
|
||||
# Can't move into an insert or delete
|
||||
break
|
||||
if name == finding_name:
|
||||
unbalanced_end.pop()
|
||||
post_delete.insert(0, pre_delete.pop())
|
||||
else:
|
||||
# Found a tag that doesn't match
|
||||
break
|
||||
|
||||
class token(_unicode):
|
||||
""" Represents a diffable token, generally a word that is displayed to
|
||||
the user. Opening tags are attached to this token when they are
|
||||
adjacent (pre_tags) and closing tags that follow the word
|
||||
(post_tags). Some exceptions occur when there are empty tags
|
||||
adjacent to a word, so there may be close tags in pre_tags, or
|
||||
open tags in post_tags.
|
||||
|
||||
We also keep track of whether the word was originally followed by
|
||||
whitespace, even though we do not want to treat the word as
|
||||
equivalent to a similar word that does not have a trailing
|
||||
space."""
|
||||
|
||||
# When this is true, the token will be eliminated from the
|
||||
# displayed diff if no change has occurred:
|
||||
hide_when_equal = False
|
||||
|
||||
def __new__(cls, text, pre_tags=None, post_tags=None, trailing_whitespace=""):
|
||||
obj = _unicode.__new__(cls, text)
|
||||
|
||||
if pre_tags is not None:
|
||||
obj.pre_tags = pre_tags
|
||||
else:
|
||||
obj.pre_tags = []
|
||||
|
||||
if post_tags is not None:
|
||||
obj.post_tags = post_tags
|
||||
else:
|
||||
obj.post_tags = []
|
||||
|
||||
obj.trailing_whitespace = trailing_whitespace
|
||||
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return 'token(%s, %r, %r, %r)' % (_unicode.__repr__(self), self.pre_tags,
|
||||
self.post_tags, self.trailing_whitespace)
|
||||
|
||||
def html(self):
|
||||
return _unicode(self)
|
||||
|
||||
class tag_token(token):
|
||||
|
||||
""" Represents a token that is actually a tag. Currently this is just
|
||||
the <img> tag, which takes up visible space just like a word but
|
||||
is only represented in a document by a tag. """
|
||||
|
||||
def __new__(cls, tag, data, html_repr, pre_tags=None,
|
||||
post_tags=None, trailing_whitespace=""):
|
||||
obj = token.__new__(cls, "%s: %s" % (type, data),
|
||||
pre_tags=pre_tags,
|
||||
post_tags=post_tags,
|
||||
trailing_whitespace=trailing_whitespace)
|
||||
obj.tag = tag
|
||||
obj.data = data
|
||||
obj.html_repr = html_repr
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return 'tag_token(%s, %s, html_repr=%s, post_tags=%r, pre_tags=%r, trailing_whitespace=%r)' % (
|
||||
self.tag,
|
||||
self.data,
|
||||
self.html_repr,
|
||||
self.pre_tags,
|
||||
self.post_tags,
|
||||
self.trailing_whitespace)
|
||||
def html(self):
|
||||
return self.html_repr
|
||||
|
||||
class href_token(token):
|
||||
|
||||
""" Represents the href in an anchor tag. Unlike other words, we only
|
||||
show the href when it changes. """
|
||||
|
||||
hide_when_equal = True
|
||||
|
||||
def html(self):
|
||||
return ' Link: %s' % self
|
||||
|
||||
def tokenize(html, include_hrefs=True):
|
||||
"""
|
||||
Parse the given HTML and returns token objects (words with attached tags).
|
||||
|
||||
This parses only the content of a page; anything in the head is
|
||||
ignored, and the <head> and <body> elements are themselves
|
||||
optional. The content is then parsed by lxml, which ensures the
|
||||
validity of the resulting parsed document (though lxml may make
|
||||
incorrect guesses when the markup is particular bad).
|
||||
|
||||
<ins> and <del> tags are also eliminated from the document, as
|
||||
that gets confusing.
|
||||
|
||||
If include_hrefs is true, then the href attribute of <a> tags is
|
||||
included as a special kind of diffable token."""
|
||||
if etree.iselement(html):
|
||||
body_el = html
|
||||
else:
|
||||
body_el = parse_html(html, cleanup=True)
|
||||
# Then we split the document into text chunks for each tag, word, and end tag:
|
||||
chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs)
|
||||
# Finally re-joining them into token objects:
|
||||
return fixup_chunks(chunks)
|
||||
|
||||
def parse_html(html, cleanup=True):
|
||||
"""
|
||||
Parses an HTML fragment, returning an lxml element. Note that the HTML will be
|
||||
wrapped in a <div> tag that was not in the original document.
|
||||
|
||||
If cleanup is true, make sure there's no <head> or <body>, and get
|
||||
rid of any <ins> and <del> tags.
|
||||
"""
|
||||
if cleanup:
|
||||
# This removes any extra markup or structure like <head>:
|
||||
html = cleanup_html(html)
|
||||
return fragment_fromstring(html, create_parent=True)
|
||||
|
||||
_body_re = re.compile(r'<body.*?>', re.I|re.S)
|
||||
_end_body_re = re.compile(r'</body.*?>', re.I|re.S)
|
||||
_ins_del_re = re.compile(r'</?(ins|del).*?>', re.I|re.S)
|
||||
|
||||
def cleanup_html(html):
|
||||
""" This 'cleans' the HTML, meaning that any page structure is removed
|
||||
(only the contents of <body> are used, if there is any <body).
|
||||
Also <ins> and <del> tags are removed. """
|
||||
match = _body_re.search(html)
|
||||
if match:
|
||||
html = html[match.end():]
|
||||
match = _end_body_re.search(html)
|
||||
if match:
|
||||
html = html[:match.start()]
|
||||
html = _ins_del_re.sub('', html)
|
||||
return html
|
||||
|
||||
|
||||
end_whitespace_re = re.compile(r'[ \t\n\r]$')
|
||||
|
||||
def split_trailing_whitespace(word):
|
||||
"""
|
||||
This function takes a word, such as 'test\n\n' and returns ('test','\n\n')
|
||||
"""
|
||||
stripped_length = len(word.rstrip())
|
||||
return word[0:stripped_length], word[stripped_length:]
|
||||
|
||||
|
||||
def fixup_chunks(chunks):
|
||||
"""
|
||||
This function takes a list of chunks and produces a list of tokens.
|
||||
"""
|
||||
tag_accum = []
|
||||
cur_word = None
|
||||
result = []
|
||||
for chunk in chunks:
|
||||
if isinstance(chunk, tuple):
|
||||
if chunk[0] == 'img':
|
||||
src = chunk[1]
|
||||
tag, trailing_whitespace = split_trailing_whitespace(chunk[2])
|
||||
cur_word = tag_token('img', src, html_repr=tag,
|
||||
pre_tags=tag_accum,
|
||||
trailing_whitespace=trailing_whitespace)
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
|
||||
elif chunk[0] == 'href':
|
||||
href = chunk[1]
|
||||
cur_word = href_token(href, pre_tags=tag_accum, trailing_whitespace=" ")
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
continue
|
||||
|
||||
if is_word(chunk):
|
||||
chunk, trailing_whitespace = split_trailing_whitespace(chunk)
|
||||
cur_word = token(chunk, pre_tags=tag_accum, trailing_whitespace=trailing_whitespace)
|
||||
tag_accum = []
|
||||
result.append(cur_word)
|
||||
|
||||
elif is_start_tag(chunk):
|
||||
tag_accum.append(chunk)
|
||||
|
||||
elif is_end_tag(chunk):
|
||||
if tag_accum:
|
||||
tag_accum.append(chunk)
|
||||
else:
|
||||
assert cur_word, (
|
||||
"Weird state, cur_word=%r, result=%r, chunks=%r of %r"
|
||||
% (cur_word, result, chunk, chunks))
|
||||
cur_word.post_tags.append(chunk)
|
||||
else:
|
||||
assert(0)
|
||||
|
||||
if not result:
|
||||
return [token('', pre_tags=tag_accum)]
|
||||
else:
|
||||
result[-1].post_tags.extend(tag_accum)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# All the tags in HTML that don't require end tags:
|
||||
empty_tags = (
|
||||
'param', 'img', 'area', 'br', 'basefont', 'input',
|
||||
'base', 'meta', 'link', 'col')
|
||||
|
||||
block_level_tags = (
|
||||
'address',
|
||||
'blockquote',
|
||||
'center',
|
||||
'dir',
|
||||
'div',
|
||||
'dl',
|
||||
'fieldset',
|
||||
'form',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'isindex',
|
||||
'menu',
|
||||
'noframes',
|
||||
'noscript',
|
||||
'ol',
|
||||
'p',
|
||||
'pre',
|
||||
'table',
|
||||
'ul',
|
||||
)
|
||||
|
||||
block_level_container_tags = (
|
||||
'dd',
|
||||
'dt',
|
||||
'frameset',
|
||||
'li',
|
||||
'tbody',
|
||||
'td',
|
||||
'tfoot',
|
||||
'th',
|
||||
'thead',
|
||||
'tr',
|
||||
)
|
||||
|
||||
|
||||
def flatten_el(el, include_hrefs, skip_tag=False):
|
||||
""" Takes an lxml element el, and generates all the text chunks for
|
||||
that tag. Each start tag is a chunk, each word is a chunk, and each
|
||||
end tag is a chunk.
|
||||
|
||||
If skip_tag is true, then the outermost container tag is
|
||||
not returned (just its contents)."""
|
||||
if not skip_tag:
|
||||
if el.tag == 'img':
|
||||
yield ('img', el.get('src'), start_tag(el))
|
||||
else:
|
||||
yield start_tag(el)
|
||||
if el.tag in empty_tags and not el.text and not len(el) and not el.tail:
|
||||
return
|
||||
start_words = split_words(el.text)
|
||||
for word in start_words:
|
||||
yield html_escape(word)
|
||||
for child in el:
|
||||
for item in flatten_el(child, include_hrefs=include_hrefs):
|
||||
yield item
|
||||
if el.tag == 'a' and el.get('href') and include_hrefs:
|
||||
yield ('href', el.get('href'))
|
||||
if not skip_tag:
|
||||
yield end_tag(el)
|
||||
end_words = split_words(el.tail)
|
||||
for word in end_words:
|
||||
yield html_escape(word)
|
||||
|
||||
split_words_re = re.compile(r'\S+(?:\s+|$)', re.U)
|
||||
|
||||
def split_words(text):
|
||||
""" Splits some text into words. Includes trailing whitespace
|
||||
on each word when appropriate. """
|
||||
if not text or not text.strip():
|
||||
return []
|
||||
|
||||
words = split_words_re.findall(text)
|
||||
return words
|
||||
|
||||
start_whitespace_re = re.compile(r'^[ \t\n\r]')
|
||||
|
||||
def start_tag(el):
|
||||
"""
|
||||
The text representation of the start tag for a tag.
|
||||
"""
|
||||
return '<%s%s>' % (
|
||||
el.tag, ''.join([' %s="%s"' % (name, html_escape(value, True))
|
||||
for name, value in el.attrib.items()]))
|
||||
|
||||
def end_tag(el):
|
||||
""" The text representation of an end tag for a tag. Includes
|
||||
trailing whitespace when appropriate. """
|
||||
if el.tail and start_whitespace_re.search(el.tail):
|
||||
extra = ' '
|
||||
else:
|
||||
extra = ''
|
||||
return '</%s>%s' % (el.tag, extra)
|
||||
|
||||
def is_word(tok):
|
||||
return not tok.startswith('<')
|
||||
|
||||
def is_end_tag(tok):
|
||||
return tok.startswith('</')
|
||||
|
||||
def is_start_tag(tok):
|
||||
return tok.startswith('<') and not tok.startswith('</')
|
||||
|
||||
def fixup_ins_del_tags(html):
|
||||
""" Given an html string, move any <ins> or <del> tags inside of any
|
||||
block-level elements, e.g. transform <ins><p>word</p></ins> to
|
||||
<p><ins>word</ins></p> """
|
||||
doc = parse_html(html, cleanup=False)
|
||||
_fixup_ins_del_tags(doc)
|
||||
html = serialize_html_fragment(doc, skip_outer=True)
|
||||
return html
|
||||
|
||||
def serialize_html_fragment(el, skip_outer=False):
|
||||
""" Serialize a single lxml element as HTML. The serialized form
|
||||
includes the elements tail.
|
||||
|
||||
If skip_outer is true, then don't serialize the outermost tag
|
||||
"""
|
||||
assert not isinstance(el, basestring), (
|
||||
"You should pass in an element, not a string like %r" % el)
|
||||
html = etree.tostring(el, method="html", encoding=_unicode)
|
||||
if skip_outer:
|
||||
# Get rid of the extra starting tag:
|
||||
html = html[html.find('>')+1:]
|
||||
# Get rid of the extra end tag:
|
||||
html = html[:html.rfind('<')]
|
||||
return html.strip()
|
||||
else:
|
||||
return html
|
||||
|
||||
def _fixup_ins_del_tags(doc):
|
||||
"""fixup_ins_del_tags that works on an lxml document in-place
|
||||
"""
|
||||
for tag in ['ins', 'del']:
|
||||
for el in doc.xpath('descendant-or-self::%s' % tag):
|
||||
if not _contains_block_level_tag(el):
|
||||
continue
|
||||
_move_el_inside_block(el, tag=tag)
|
||||
el.drop_tag()
|
||||
#_merge_element_contents(el)
|
||||
|
||||
def _contains_block_level_tag(el):
|
||||
"""True if the element contains any block-level elements, like <p>, <td>, etc.
|
||||
"""
|
||||
if el.tag in block_level_tags or el.tag in block_level_container_tags:
|
||||
return True
|
||||
for child in el:
|
||||
if _contains_block_level_tag(child):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _move_el_inside_block(el, tag):
|
||||
""" helper for _fixup_ins_del_tags; actually takes the <ins> etc tags
|
||||
and moves them inside any block-level tags. """
|
||||
for child in el:
|
||||
if _contains_block_level_tag(child):
|
||||
break
|
||||
else:
|
||||
import sys
|
||||
# No block-level tags in any child
|
||||
children_tag = etree.Element(tag)
|
||||
children_tag.text = el.text
|
||||
el.text = None
|
||||
children_tag.extend(list(el))
|
||||
el[:] = [children_tag]
|
||||
return
|
||||
for child in list(el):
|
||||
if _contains_block_level_tag(child):
|
||||
_move_el_inside_block(child, tag)
|
||||
if child.tail:
|
||||
tail_tag = etree.Element(tag)
|
||||
tail_tag.text = child.tail
|
||||
child.tail = None
|
||||
el.insert(el.index(child)+1, tail_tag)
|
||||
else:
|
||||
child_tag = etree.Element(tag)
|
||||
el.replace(child, child_tag)
|
||||
child_tag.append(child)
|
||||
if el.text:
|
||||
text_tag = etree.Element(tag)
|
||||
text_tag.text = el.text
|
||||
el.text = None
|
||||
el.insert(0, text_tag)
|
||||
|
||||
def _merge_element_contents(el):
|
||||
"""
|
||||
Removes an element, but merges its contents into its place, e.g.,
|
||||
given <p>Hi <i>there!</i></p>, if you remove the <i> element you get
|
||||
<p>Hi there!</p>
|
||||
"""
|
||||
parent = el.getparent()
|
||||
text = el.text or ''
|
||||
if el.tail:
|
||||
if not len(el):
|
||||
text += el.tail
|
||||
else:
|
||||
if el[-1].tail:
|
||||
el[-1].tail += el.tail
|
||||
else:
|
||||
el[-1].tail = el.tail
|
||||
index = parent.index(el)
|
||||
if text:
|
||||
if index == 0:
|
||||
previous = None
|
||||
else:
|
||||
previous = parent[index-1]
|
||||
if previous is None:
|
||||
if parent.text:
|
||||
parent.text += text
|
||||
else:
|
||||
parent.text = text
|
||||
else:
|
||||
if previous.tail:
|
||||
previous.tail += text
|
||||
else:
|
||||
previous.tail = text
|
||||
parent[index:index+1] = el.getchildren()
|
||||
|
||||
class InsensitiveSequenceMatcher(difflib.SequenceMatcher):
|
||||
"""
|
||||
Acts like SequenceMatcher, but tries not to find very small equal
|
||||
blocks amidst large spans of changes
|
||||
"""
|
||||
|
||||
threshold = 2
|
||||
|
||||
def get_matching_blocks(self):
|
||||
size = min(len(self.b), len(self.b))
|
||||
threshold = min(self.threshold, size / 4)
|
||||
actual = difflib.SequenceMatcher.get_matching_blocks(self)
|
||||
return [item for item in actual
|
||||
if item[2] > threshold
|
||||
or not item[2]]
|
||||
|
||||
if __name__ == '__main__':
|
||||
from lxml.html import _diffcommand
|
||||
_diffcommand.main()
|
||||
|
||||
299
lib/python3.5/site-packages/lxml/html/formfill.py
Normal file
299
lib/python3.5/site-packages/lxml/html/formfill.py
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
from lxml.etree import XPath, ElementBase
|
||||
from lxml.html import fromstring, tostring, XHTML_NAMESPACE
|
||||
from lxml.html import _forms_xpath, _options_xpath, _nons, _transform_result
|
||||
from lxml.html import defs
|
||||
import copy
|
||||
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
|
||||
'insert_errors', 'insert_errors_html',
|
||||
'DefaultErrorCreator']
|
||||
|
||||
class FormNotFound(LookupError):
|
||||
"""
|
||||
Raised when no form can be found
|
||||
"""
|
||||
|
||||
_form_name_xpath = XPath('descendant-or-self::form[name=$name]|descendant-or-self::x:form[name=$name]', namespaces={'x':XHTML_NAMESPACE})
|
||||
_input_xpath = XPath('|'.join(['descendant-or-self::'+_tag for _tag in ('input','select','textarea','x:input','x:select','x:textarea')]),
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
_label_for_xpath = XPath('//label[@for=$for_id]|//x:label[@for=$for_id]',
|
||||
namespaces={'x':XHTML_NAMESPACE})
|
||||
_name_xpath = XPath('descendant-or-self::*[@name=$name]')
|
||||
|
||||
def fill_form(
|
||||
el,
|
||||
values,
|
||||
form_id=None,
|
||||
form_index=None,
|
||||
):
|
||||
el = _find_form(el, form_id=form_id, form_index=form_index)
|
||||
_fill_form(el, values)
|
||||
|
||||
def fill_form_html(html, values, form_id=None, form_index=None):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
fill_form(doc, values, form_id=form_id, form_index=form_index)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _fill_form(el, values):
|
||||
counts = {}
|
||||
if hasattr(values, 'mixed'):
|
||||
# For Paste request parameters
|
||||
values = values.mixed()
|
||||
inputs = _input_xpath(el)
|
||||
for input in inputs:
|
||||
name = input.get('name')
|
||||
if not name:
|
||||
continue
|
||||
if _takes_multiple(input):
|
||||
value = values.get(name, [])
|
||||
if not isinstance(value, (list, tuple)):
|
||||
value = [value]
|
||||
_fill_multiple(input, value)
|
||||
elif name not in values:
|
||||
continue
|
||||
else:
|
||||
index = counts.get(name, 0)
|
||||
counts[name] = index + 1
|
||||
value = values[name]
|
||||
if isinstance(value, (list, tuple)):
|
||||
try:
|
||||
value = value[index]
|
||||
except IndexError:
|
||||
continue
|
||||
elif index > 0:
|
||||
continue
|
||||
_fill_single(input, value)
|
||||
|
||||
def _takes_multiple(input):
|
||||
if _nons(input.tag) == 'select' and input.get('multiple'):
|
||||
# FIXME: multiple="0"?
|
||||
return True
|
||||
type = input.get('type', '').lower()
|
||||
if type in ('radio', 'checkbox'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _fill_multiple(input, value):
|
||||
type = input.get('type', '').lower()
|
||||
if type == 'checkbox':
|
||||
v = input.get('value')
|
||||
if v is None:
|
||||
if not value:
|
||||
result = False
|
||||
else:
|
||||
result = value[0]
|
||||
if isinstance(value, basestring):
|
||||
# The only valid "on" value for an unnamed checkbox is 'on'
|
||||
result = result == 'on'
|
||||
_check(input, result)
|
||||
else:
|
||||
_check(input, v in value)
|
||||
elif type == 'radio':
|
||||
v = input.get('value')
|
||||
_check(input, v in value)
|
||||
else:
|
||||
assert _nons(input.tag) == 'select'
|
||||
for option in _options_xpath(input):
|
||||
v = option.get('value')
|
||||
if v is None:
|
||||
# This seems to be the default, at least on IE
|
||||
# FIXME: but I'm not sure
|
||||
v = option.text_content()
|
||||
_select(option, v in value)
|
||||
|
||||
def _check(el, check):
|
||||
if check:
|
||||
el.set('checked', '')
|
||||
else:
|
||||
if 'checked' in el.attrib:
|
||||
del el.attrib['checked']
|
||||
|
||||
def _select(el, select):
|
||||
if select:
|
||||
el.set('selected', '')
|
||||
else:
|
||||
if 'selected' in el.attrib:
|
||||
del el.attrib['selected']
|
||||
|
||||
def _fill_single(input, value):
|
||||
if _nons(input.tag) == 'textarea':
|
||||
input.text = value
|
||||
else:
|
||||
input.set('value', value)
|
||||
|
||||
def _find_form(el, form_id=None, form_index=None):
|
||||
if form_id is None and form_index is None:
|
||||
forms = _forms_xpath(el)
|
||||
for form in forms:
|
||||
return form
|
||||
raise FormNotFound(
|
||||
"No forms in page")
|
||||
if form_id is not None:
|
||||
form = el.get_element_by_id(form_id)
|
||||
if form is not None:
|
||||
return form
|
||||
forms = _form_name_xpath(el, name=form_id)
|
||||
if forms:
|
||||
return forms[0]
|
||||
else:
|
||||
raise FormNotFound(
|
||||
"No form with the name or id of %r (forms: %s)"
|
||||
% (id, ', '.join(_find_form_ids(el))))
|
||||
if form_index is not None:
|
||||
forms = _forms_xpath(el)
|
||||
try:
|
||||
return forms[form_index]
|
||||
except IndexError:
|
||||
raise FormNotFound(
|
||||
"There is no form with the index %r (%i forms found)"
|
||||
% (form_index, len(forms)))
|
||||
|
||||
def _find_form_ids(el):
|
||||
forms = _forms_xpath(el)
|
||||
if not forms:
|
||||
yield '(no forms)'
|
||||
return
|
||||
for index, form in enumerate(forms):
|
||||
if form.get('id'):
|
||||
if form.get('name'):
|
||||
yield '%s or %s' % (form.get('id'),
|
||||
form.get('name'))
|
||||
else:
|
||||
yield form.get('id')
|
||||
elif form.get('name'):
|
||||
yield form.get('name')
|
||||
else:
|
||||
yield '(unnamed form %s)' % index
|
||||
|
||||
############################################################
|
||||
## Error filling
|
||||
############################################################
|
||||
|
||||
class DefaultErrorCreator(object):
|
||||
insert_before = True
|
||||
block_inside = True
|
||||
error_container_tag = 'div'
|
||||
error_message_class = 'error-message'
|
||||
error_block_class = 'error-block'
|
||||
default_message = "Invalid"
|
||||
|
||||
def __init__(self, **kw):
|
||||
for name, value in kw.items():
|
||||
if not hasattr(self, name):
|
||||
raise TypeError(
|
||||
"Unexpected keyword argument: %s" % name)
|
||||
setattr(self, name, value)
|
||||
|
||||
def __call__(self, el, is_block, message):
|
||||
error_el = el.makeelement(self.error_container_tag)
|
||||
if self.error_message_class:
|
||||
error_el.set('class', self.error_message_class)
|
||||
if is_block and self.error_block_class:
|
||||
error_el.set('class', error_el.get('class', '')+' '+self.error_block_class)
|
||||
if message is None or message == '':
|
||||
message = self.default_message
|
||||
if isinstance(message, ElementBase):
|
||||
error_el.append(message)
|
||||
else:
|
||||
assert isinstance(message, basestring), (
|
||||
"Bad message; should be a string or element: %r" % message)
|
||||
error_el.text = message or self.default_message
|
||||
if is_block and self.block_inside:
|
||||
if self.insert_before:
|
||||
error_el.tail = el.text
|
||||
el.text = None
|
||||
el.insert(0, error_el)
|
||||
else:
|
||||
el.append(error_el)
|
||||
else:
|
||||
parent = el.getparent()
|
||||
pos = parent.index(el)
|
||||
if self.insert_before:
|
||||
parent.insert(pos, error_el)
|
||||
else:
|
||||
error_el.tail = el.tail
|
||||
el.tail = None
|
||||
parent.insert(pos+1, error_el)
|
||||
|
||||
default_error_creator = DefaultErrorCreator()
|
||||
|
||||
|
||||
def insert_errors(
|
||||
el,
|
||||
errors,
|
||||
form_id=None,
|
||||
form_index=None,
|
||||
error_class="error",
|
||||
error_creator=default_error_creator,
|
||||
):
|
||||
el = _find_form(el, form_id=form_id, form_index=form_index)
|
||||
for name, error in errors.items():
|
||||
if error is None:
|
||||
continue
|
||||
for error_el, message in _find_elements_for_name(el, name, error):
|
||||
assert isinstance(message, (basestring, type(None), ElementBase)), (
|
||||
"Bad message: %r" % message)
|
||||
_insert_error(error_el, message, error_class, error_creator)
|
||||
|
||||
def insert_errors_html(html, values, **kw):
|
||||
result_type = type(html)
|
||||
if isinstance(html, basestring):
|
||||
doc = fromstring(html)
|
||||
else:
|
||||
doc = copy.deepcopy(html)
|
||||
insert_errors(doc, values, **kw)
|
||||
return _transform_result(result_type, doc)
|
||||
|
||||
def _insert_error(el, error, error_class, error_creator):
|
||||
if _nons(el.tag) in defs.empty_tags or _nons(el.tag) == 'textarea':
|
||||
is_block = False
|
||||
else:
|
||||
is_block = True
|
||||
if _nons(el.tag) != 'form' and error_class:
|
||||
_add_class(el, error_class)
|
||||
if el.get('id'):
|
||||
labels = _label_for_xpath(el, for_id=el.get('id'))
|
||||
if labels:
|
||||
for label in labels:
|
||||
_add_class(label, error_class)
|
||||
error_creator(el, is_block, error)
|
||||
|
||||
def _add_class(el, class_name):
|
||||
if el.get('class'):
|
||||
el.set('class', el.get('class')+' '+class_name)
|
||||
else:
|
||||
el.set('class', class_name)
|
||||
|
||||
def _find_elements_for_name(form, name, error):
|
||||
if name is None:
|
||||
# An error for the entire form
|
||||
yield form, error
|
||||
return
|
||||
if name.startswith('#'):
|
||||
# By id
|
||||
el = form.get_element_by_id(name[1:])
|
||||
if el is not None:
|
||||
yield el, error
|
||||
return
|
||||
els = _name_xpath(form, name=name)
|
||||
if not els:
|
||||
# FIXME: should this raise an exception?
|
||||
return
|
||||
if not isinstance(error, (list, tuple)):
|
||||
yield els[0], error
|
||||
return
|
||||
# FIXME: if error is longer than els, should it raise an error?
|
||||
for el, err in zip(els, error):
|
||||
if err is None:
|
||||
continue
|
||||
yield el, err
|
||||
207
lib/python3.5/site-packages/lxml/html/html5parser.py
Normal file
207
lib/python3.5/site-packages/lxml/html/html5parser.py
Normal file
|
|
@ -0,0 +1,207 @@
|
|||
"""
|
||||
An interface to html5lib that mimics the lxml.html interface.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import string
|
||||
|
||||
from html5lib import HTMLParser as _HTMLParser
|
||||
from html5lib.treebuilders.etree_lxml import TreeBuilder
|
||||
|
||||
from lxml import etree
|
||||
from lxml.html import _contains_block_level_tag, XHTML_NAMESPACE, Element
|
||||
|
||||
# python3 compatibility
|
||||
try:
|
||||
_strings = basestring
|
||||
except NameError:
|
||||
_strings = (bytes, str)
|
||||
try:
|
||||
from urllib2 import urlopen
|
||||
except ImportError:
|
||||
from urllib.request import urlopen
|
||||
try:
|
||||
from urlparse import urlparse
|
||||
except ImportError:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
class HTMLParser(_HTMLParser):
|
||||
"""An html5lib HTML parser with lxml as tree."""
|
||||
|
||||
def __init__(self, strict=False, **kwargs):
|
||||
_HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
|
||||
|
||||
|
||||
try:
|
||||
from html5lib import XHTMLParser as _XHTMLParser
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
class XHTMLParser(_XHTMLParser):
|
||||
"""An html5lib XHTML Parser with lxml as tree."""
|
||||
|
||||
def __init__(self, strict=False, **kwargs):
|
||||
_XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
|
||||
|
||||
xhtml_parser = XHTMLParser()
|
||||
|
||||
|
||||
def _find_tag(tree, tag):
|
||||
elem = tree.find(tag)
|
||||
if elem is not None:
|
||||
return elem
|
||||
return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
|
||||
|
||||
|
||||
def document_fromstring(html, guess_charset=True, parser=None):
|
||||
"""Parse a whole document into a string."""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
|
||||
return parser.parse(html, useChardet=guess_charset).getroot()
|
||||
|
||||
|
||||
def fragments_fromstring(html, no_leading_text=False,
|
||||
guess_charset=False, parser=None):
|
||||
"""Parses several HTML elements, returning a list of elements.
|
||||
|
||||
The first item in the list may be a string. If no_leading_text is true,
|
||||
then it will be an error if there is leading text, and it will always be
|
||||
a list of only elements.
|
||||
|
||||
If `guess_charset` is `True` and the text was not unicode but a
|
||||
bytestring, the `chardet` library will perform charset guessing on the
|
||||
string.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
|
||||
children = parser.parseFragment(html, 'div', useChardet=guess_charset)
|
||||
if children and isinstance(children[0], _strings):
|
||||
if no_leading_text:
|
||||
if children[0].strip():
|
||||
raise etree.ParserError('There is leading text: %r' %
|
||||
children[0])
|
||||
del children[0]
|
||||
return children
|
||||
|
||||
|
||||
def fragment_fromstring(html, create_parent=False,
|
||||
guess_charset=False, parser=None):
|
||||
"""Parses a single HTML element; it is an error if there is more than
|
||||
one element, or if anything but whitespace precedes or follows the
|
||||
element.
|
||||
|
||||
If create_parent is true (or is a tag name) then a parent node
|
||||
will be created to encapsulate the HTML in a single element. In
|
||||
this case, leading or trailing text is allowed.
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
|
||||
accept_leading_text = bool(create_parent)
|
||||
|
||||
elements = fragments_fromstring(
|
||||
html, guess_charset=guess_charset, parser=parser,
|
||||
no_leading_text=not accept_leading_text)
|
||||
|
||||
if create_parent:
|
||||
if not isinstance(create_parent, _strings):
|
||||
create_parent = 'div'
|
||||
new_root = Element(create_parent)
|
||||
if elements:
|
||||
if isinstance(elements[0], _strings):
|
||||
new_root.text = elements[0]
|
||||
del elements[0]
|
||||
new_root.extend(elements)
|
||||
return new_root
|
||||
|
||||
if not elements:
|
||||
raise etree.ParserError('No elements found')
|
||||
if len(elements) > 1:
|
||||
raise etree.ParserError('Multiple elements found')
|
||||
result = elements[0]
|
||||
if result.tail and result.tail.strip():
|
||||
raise etree.ParserError('Element followed by text: %r' % result.tail)
|
||||
result.tail = None
|
||||
return result
|
||||
|
||||
|
||||
def fromstring(html, guess_charset=True, parser=None):
|
||||
"""Parse the html, returning a single element/document.
|
||||
|
||||
This tries to minimally parse the chunk of text, without knowing if it
|
||||
is a fragment or a document.
|
||||
|
||||
base_url will set the document's base_url attribute (and the tree's docinfo.URL)
|
||||
"""
|
||||
if not isinstance(html, _strings):
|
||||
raise TypeError('string required')
|
||||
doc = document_fromstring(html, parser=parser,
|
||||
guess_charset=guess_charset)
|
||||
|
||||
# document starts with doctype or <html>, full document!
|
||||
start = html[:50].lstrip().lower()
|
||||
if start.startswith('<html') or start.startswith('<!doctype'):
|
||||
return doc
|
||||
|
||||
head = _find_tag(doc, 'head')
|
||||
|
||||
# if the head is not empty we have a full document
|
||||
if len(head):
|
||||
return doc
|
||||
|
||||
body = _find_tag(doc, 'body')
|
||||
|
||||
# The body has just one element, so it was probably a single
|
||||
# element passed in
|
||||
if (len(body) == 1 and (not body.text or not body.text.strip())
|
||||
and (not body[-1].tail or not body[-1].tail.strip())):
|
||||
return body[0]
|
||||
|
||||
# Now we have a body which represents a bunch of tags which have the
|
||||
# content that was passed in. We will create a fake container, which
|
||||
# is the body tag, except <body> implies too much structure.
|
||||
if _contains_block_level_tag(body):
|
||||
body.tag = 'div'
|
||||
else:
|
||||
body.tag = 'span'
|
||||
return body
|
||||
|
||||
|
||||
def parse(filename_url_or_file, guess_charset=True, parser=None):
|
||||
"""Parse a filename, URL, or file-like object into an HTML document
|
||||
tree. Note: this returns a tree, not an element. Use
|
||||
``parse(...).getroot()`` to get the document root.
|
||||
"""
|
||||
if parser is None:
|
||||
parser = html_parser
|
||||
if not isinstance(filename_url_or_file, _strings):
|
||||
fp = filename_url_or_file
|
||||
elif _looks_like_url(filename_url_or_file):
|
||||
fp = urlopen(filename_url_or_file)
|
||||
else:
|
||||
fp = open(filename_url_or_file, 'rb')
|
||||
return parser.parse(fp, useChardet=guess_charset)
|
||||
|
||||
|
||||
def _looks_like_url(str):
|
||||
scheme = urlparse(str)[0]
|
||||
if not scheme:
|
||||
return False
|
||||
elif (sys.platform == 'win32' and
|
||||
scheme in string.ascii_letters
|
||||
and len(scheme) == 1):
|
||||
# looks like a 'normal' absolute path
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
html_parser = HTMLParser()
|
||||
303
lib/python3.5/site-packages/lxml/html/soupparser.py
Normal file
303
lib/python3.5/site-packages/lxml/html/soupparser.py
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
"""External interface to the BeautifulSoup HTML parser.
|
||||
"""
|
||||
|
||||
__all__ = ["fromstring", "parse", "convert_tree"]
|
||||
|
||||
import re
|
||||
from lxml import etree, html
|
||||
|
||||
try:
|
||||
from bs4 import (
|
||||
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
||||
Declaration, CData, Doctype)
|
||||
_DECLARATION_OR_DOCTYPE = (Declaration, Doctype)
|
||||
except ImportError:
|
||||
from BeautifulSoup import (
|
||||
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
||||
Declaration, CData)
|
||||
_DECLARATION_OR_DOCTYPE = Declaration
|
||||
|
||||
|
||||
def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs):
|
||||
"""Parse a string of HTML data into an Element tree using the
|
||||
BeautifulSoup parser.
|
||||
|
||||
Returns the root ``<html>`` Element of the tree.
|
||||
|
||||
You can pass a different BeautifulSoup parser through the
|
||||
`beautifulsoup` keyword, and a diffent Element factory function
|
||||
through the `makeelement` keyword. By default, the standard
|
||||
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
||||
used.
|
||||
"""
|
||||
return _parse(data, beautifulsoup, makeelement, **bsargs)
|
||||
|
||||
|
||||
def parse(file, beautifulsoup=None, makeelement=None, **bsargs):
|
||||
"""Parse a file into an ElemenTree using the BeautifulSoup parser.
|
||||
|
||||
You can pass a different BeautifulSoup parser through the
|
||||
`beautifulsoup` keyword, and a diffent Element factory function
|
||||
through the `makeelement` keyword. By default, the standard
|
||||
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
||||
used.
|
||||
"""
|
||||
if not hasattr(file, 'read'):
|
||||
file = open(file)
|
||||
root = _parse(file, beautifulsoup, makeelement, **bsargs)
|
||||
return etree.ElementTree(root)
|
||||
|
||||
|
||||
def convert_tree(beautiful_soup_tree, makeelement=None):
|
||||
"""Convert a BeautifulSoup tree to a list of Element trees.
|
||||
|
||||
Returns a list instead of a single root Element to support
|
||||
HTML-like soup with more than one root element.
|
||||
|
||||
You can pass a different Element factory through the `makeelement`
|
||||
keyword.
|
||||
"""
|
||||
root = _convert_tree(beautiful_soup_tree, makeelement)
|
||||
children = root.getchildren()
|
||||
for child in children:
|
||||
root.remove(child)
|
||||
return children
|
||||
|
||||
|
||||
# helpers
|
||||
|
||||
def _parse(source, beautifulsoup, makeelement, **bsargs):
|
||||
if beautifulsoup is None:
|
||||
beautifulsoup = BeautifulSoup
|
||||
if hasattr(beautifulsoup, "HTML_ENTITIES"): # bs3
|
||||
if 'convertEntities' not in bsargs:
|
||||
bsargs['convertEntities'] = 'html'
|
||||
if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"): # bs4
|
||||
if 'features' not in bsargs:
|
||||
bsargs['features'] = ['html.parser'] # use Python html parser
|
||||
tree = beautifulsoup(source, **bsargs)
|
||||
root = _convert_tree(tree, makeelement)
|
||||
# from ET: wrap the document in a html root element, if necessary
|
||||
if len(root) == 1 and root[0].tag == "html":
|
||||
return root[0]
|
||||
root.tag = "html"
|
||||
return root
|
||||
|
||||
|
||||
_parse_doctype_declaration = re.compile(
|
||||
r'(?:\s|[<!])*DOCTYPE\s*HTML'
|
||||
r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?'
|
||||
r'(?:\s+(\'[^\']*\'|"[^"]*"))?',
|
||||
re.IGNORECASE).match
|
||||
|
||||
|
||||
class _PseudoTag:
|
||||
# Minimal imitation of BeautifulSoup.Tag
|
||||
def __init__(self, contents):
|
||||
self.name = 'html'
|
||||
self.attrs = []
|
||||
self.contents = contents
|
||||
|
||||
def __iter__(self):
|
||||
return self.contents.__iter__()
|
||||
|
||||
|
||||
def _convert_tree(beautiful_soup_tree, makeelement):
|
||||
if makeelement is None:
|
||||
makeelement = html.html_parser.makeelement
|
||||
|
||||
# Split the tree into three parts:
|
||||
# i) everything before the root element: document type
|
||||
# declaration, comments, processing instructions, whitespace
|
||||
# ii) the root(s),
|
||||
# iii) everything after the root: comments, processing
|
||||
# instructions, whitespace
|
||||
first_element_idx = last_element_idx = None
|
||||
html_root = declaration = None
|
||||
for i, e in enumerate(beautiful_soup_tree):
|
||||
if isinstance(e, Tag):
|
||||
if first_element_idx is None:
|
||||
first_element_idx = i
|
||||
last_element_idx = i
|
||||
if html_root is None and e.name and e.name.lower() == 'html':
|
||||
html_root = e
|
||||
elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE):
|
||||
declaration = e
|
||||
|
||||
# For a nice, well-formatted document, the variable roots below is
|
||||
# a list consisting of a single <html> element. However, the document
|
||||
# may be a soup like '<meta><head><title>Hello</head><body>Hi
|
||||
# all<\p>'. In this example roots is a list containing meta, head
|
||||
# and body elements.
|
||||
pre_root = beautiful_soup_tree.contents[:first_element_idx]
|
||||
roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1]
|
||||
post_root = beautiful_soup_tree.contents[last_element_idx+1:]
|
||||
|
||||
# Reorganize so that there is one <html> root...
|
||||
if html_root is not None:
|
||||
# ... use existing one if possible, ...
|
||||
i = roots.index(html_root)
|
||||
html_root.contents = roots[:i] + html_root.contents + roots[i+1:]
|
||||
else:
|
||||
# ... otherwise create a new one.
|
||||
html_root = _PseudoTag(roots)
|
||||
|
||||
convert_node = _init_node_converters(makeelement)
|
||||
|
||||
# Process pre_root
|
||||
res_root = convert_node(html_root)
|
||||
prev = res_root
|
||||
for e in reversed(pre_root):
|
||||
converted = convert_node(e)
|
||||
if converted is not None:
|
||||
prev.addprevious(converted)
|
||||
prev = converted
|
||||
|
||||
# ditto for post_root
|
||||
prev = res_root
|
||||
for e in post_root:
|
||||
converted = convert_node(e)
|
||||
if converted is not None:
|
||||
prev.addnext(converted)
|
||||
prev = converted
|
||||
|
||||
if declaration is not None:
|
||||
try:
|
||||
# bs4 provides full Doctype string
|
||||
doctype_string = declaration.output_ready()
|
||||
except AttributeError:
|
||||
doctype_string = declaration.string
|
||||
|
||||
match = _parse_doctype_declaration(doctype_string)
|
||||
if not match:
|
||||
# Something is wrong if we end up in here. Since soupparser should
|
||||
# tolerate errors, do not raise Exception, just let it pass.
|
||||
pass
|
||||
else:
|
||||
external_id, sys_uri = match.groups()
|
||||
docinfo = res_root.getroottree().docinfo
|
||||
# strip quotes and update DOCTYPE values (any of None, '', '...')
|
||||
docinfo.public_id = external_id and external_id[1:-1]
|
||||
docinfo.system_url = sys_uri and sys_uri[1:-1]
|
||||
|
||||
return res_root
|
||||
|
||||
|
||||
def _init_node_converters(makeelement):
|
||||
converters = {}
|
||||
ordered_node_types = []
|
||||
|
||||
def converter(*types):
|
||||
def add(handler):
|
||||
for t in types:
|
||||
converters[t] = handler
|
||||
ordered_node_types.append(t)
|
||||
return handler
|
||||
return add
|
||||
|
||||
def find_best_converter(node):
|
||||
for t in ordered_node_types:
|
||||
if isinstance(node, t):
|
||||
return converters[t]
|
||||
return None
|
||||
|
||||
def convert_node(bs_node, parent=None):
|
||||
# duplicated in convert_tag() below
|
||||
try:
|
||||
handler = converters[type(bs_node)]
|
||||
except KeyError:
|
||||
handler = converters[type(bs_node)] = find_best_converter(bs_node)
|
||||
if handler is None:
|
||||
return None
|
||||
return handler(bs_node, parent)
|
||||
|
||||
def map_attrs(bs_attrs):
|
||||
if isinstance(bs_attrs, dict): # bs4
|
||||
attribs = {}
|
||||
for k, v in bs_attrs.items():
|
||||
if isinstance(v, list):
|
||||
v = " ".join(v)
|
||||
attribs[k] = unescape(v)
|
||||
else:
|
||||
attribs = dict((k, unescape(v)) for k, v in bs_attrs)
|
||||
return attribs
|
||||
|
||||
def append_text(parent, text):
|
||||
if len(parent) == 0:
|
||||
parent.text = (parent.text or '') + text
|
||||
else:
|
||||
parent[-1].tail = (parent[-1].tail or '') + text
|
||||
|
||||
# converters are tried in order of their definition
|
||||
|
||||
@converter(Tag, _PseudoTag)
|
||||
def convert_tag(bs_node, parent):
|
||||
attrs = bs_node.attrs
|
||||
if parent is not None:
|
||||
attribs = map_attrs(attrs) if attrs else None
|
||||
res = etree.SubElement(parent, bs_node.name, attrib=attribs)
|
||||
else:
|
||||
attribs = map_attrs(attrs) if attrs else {}
|
||||
res = makeelement(bs_node.name, attrib=attribs)
|
||||
|
||||
for child in bs_node:
|
||||
# avoid double recursion by inlining convert_node(), see above
|
||||
try:
|
||||
handler = converters[type(child)]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
if handler is not None:
|
||||
handler(child, res)
|
||||
continue
|
||||
convert_node(child, res)
|
||||
return res
|
||||
|
||||
@converter(Comment)
|
||||
def convert_comment(bs_node, parent):
|
||||
res = etree.Comment(bs_node)
|
||||
if parent is not None:
|
||||
parent.append(res)
|
||||
return res
|
||||
|
||||
@converter(ProcessingInstruction)
|
||||
def convert_pi(bs_node, parent):
|
||||
if bs_node.endswith('?'):
|
||||
# The PI is of XML style (<?as df?>) but BeautifulSoup
|
||||
# interpreted it as being SGML style (<?as df>). Fix.
|
||||
bs_node = bs_node[:-1]
|
||||
res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
|
||||
if parent is not None:
|
||||
parent.append(res)
|
||||
return res
|
||||
|
||||
@converter(NavigableString)
|
||||
def convert_text(bs_node, parent):
|
||||
if parent is not None:
|
||||
append_text(parent, unescape(bs_node))
|
||||
return None
|
||||
|
||||
return convert_node
|
||||
|
||||
|
||||
# copied from ET's ElementSoup
|
||||
|
||||
try:
|
||||
from html.entities import name2codepoint # Python 3
|
||||
except ImportError:
|
||||
from htmlentitydefs import name2codepoint
|
||||
|
||||
|
||||
handle_entities = re.compile("&(\w+);").sub
|
||||
|
||||
|
||||
def unescape(string):
|
||||
if not string:
|
||||
return ''
|
||||
# work around oddities in BeautifulSoup's entity handling
|
||||
def unescape_entity(m):
|
||||
try:
|
||||
return unichr(name2codepoint[m.group(1)])
|
||||
except KeyError:
|
||||
return m.group(0) # use as is
|
||||
return handle_entities(unescape_entity, string)
|
||||
13
lib/python3.5/site-packages/lxml/html/usedoctest.py
Normal file
13
lib/python3.5/site-packages/lxml/html/usedoctest.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""Doctest module for HTML comparison.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import lxml.html.usedoctest
|
||||
>>> # now do your HTML doctests ...
|
||||
|
||||
See `lxml.doctestcompare`.
|
||||
"""
|
||||
|
||||
from lxml import doctestcompare
|
||||
|
||||
doctestcompare.temp_install(html=True, del_module=__name__)
|
||||
0
lib/python3.5/site-packages/lxml/includes/__init__.py
Normal file
0
lib/python3.5/site-packages/lxml/includes/__init__.py
Normal file
26
lib/python3.5/site-packages/lxml/includes/c14n.pxd
Normal file
26
lib/python3.5/site-packages/lxml/includes/c14n.pxd
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
from lxml.includes.tree cimport xmlDoc, xmlOutputBuffer, xmlChar
|
||||
from lxml.includes.xpath cimport xmlNodeSet
|
||||
|
||||
cdef extern from "libxml/c14n.h":
|
||||
cdef int xmlC14NDocDumpMemory(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
xmlChar** doc_txt_ptr) nogil
|
||||
|
||||
cdef int xmlC14NDocSave(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
char* filename,
|
||||
int compression) nogil
|
||||
|
||||
cdef int xmlC14NDocSaveTo(xmlDoc* doc,
|
||||
xmlNodeSet* nodes,
|
||||
int exclusive,
|
||||
xmlChar** inclusive_ns_prefixes,
|
||||
int with_comments,
|
||||
xmlOutputBuffer* buffer) nogil
|
||||
|
||||
3
lib/python3.5/site-packages/lxml/includes/config.pxd
Normal file
3
lib/python3.5/site-packages/lxml/includes/config.pxd
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
cdef extern from "etree_defs.h":
|
||||
cdef bint ENABLE_THREADING
|
||||
cdef bint ENABLE_SCHEMATRON
|
||||
18
lib/python3.5/site-packages/lxml/includes/dtdvalid.pxd
Normal file
18
lib/python3.5/site-packages/lxml/includes/dtdvalid.pxd
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
from lxml.includes cimport tree
|
||||
from lxml.includes.tree cimport xmlDoc, xmlDtd
|
||||
|
||||
cdef extern from "libxml/valid.h" nogil:
|
||||
ctypedef void (*xmlValidityErrorFunc)(void * ctx, const char * msg, ...)
|
||||
ctypedef void (*xmlValidityWarningFunc)(void * ctx, const char * msg, ...)
|
||||
|
||||
ctypedef struct xmlValidCtxt:
|
||||
void *userData
|
||||
xmlValidityErrorFunc error
|
||||
xmlValidityWarningFunc warning
|
||||
|
||||
cdef xmlValidCtxt* xmlNewValidCtxt()
|
||||
cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
|
||||
|
||||
cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)
|
||||
cdef tree.xmlElement* xmlGetDtdElementDesc(
|
||||
xmlDtd* dtd, tree.const_xmlChar* name)
|
||||
373
lib/python3.5/site-packages/lxml/includes/etree_defs.h
Normal file
373
lib/python3.5/site-packages/lxml/includes/etree_defs.h
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
#ifndef HAS_ETREE_DEFS_H
|
||||
#define HAS_ETREE_DEFS_H
|
||||
|
||||
/* quick check for Python/libxml2/libxslt devel setup */
|
||||
#include "Python.h"
|
||||
#ifndef PY_VERSION_HEX
|
||||
# error the development package of Python (header files etc.) is not installed correctly
|
||||
#else
|
||||
# if PY_VERSION_HEX < 0x02060000 || PY_MAJOR_VERSION >= 3 && PY_VERSION_HEX < 0x03020000
|
||||
# error this version of lxml requires Python 2.6, 2.7, 3.2 or later
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "libxml/xmlversion.h"
|
||||
#ifndef LIBXML_VERSION
|
||||
# error the development package of libxml2 (header files etc.) is not installed correctly
|
||||
#else
|
||||
#if LIBXML_VERSION < 20700
|
||||
# error minimum required version of libxml2 is 2.7.0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "libxslt/xsltconfig.h"
|
||||
#ifndef LIBXSLT_VERSION
|
||||
# error the development package of libxslt (header files etc.) is not installed correctly
|
||||
#else
|
||||
#if LIBXSLT_VERSION < 10123
|
||||
# error minimum required version of libxslt is 1.1.23
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* v_arg functions */
|
||||
#define va_int(ap) va_arg(ap, int)
|
||||
#define va_charptr(ap) va_arg(ap, char *)
|
||||
|
||||
#ifdef PYPY_VERSION
|
||||
# define IS_PYPY 1
|
||||
#else
|
||||
# define IS_PYPY 0
|
||||
#endif
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
# define IS_PYTHON3 1
|
||||
#else
|
||||
# define IS_PYTHON3 0
|
||||
#endif
|
||||
|
||||
#if IS_PYTHON3
|
||||
#undef LXML_UNICODE_STRINGS
|
||||
#define LXML_UNICODE_STRINGS 1
|
||||
#else
|
||||
#ifndef LXML_UNICODE_STRINGS
|
||||
#define LXML_UNICODE_STRINGS 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !IS_PYPY
|
||||
# define PyWeakref_LockObject(obj) (NULL)
|
||||
#endif
|
||||
|
||||
/* Threading is not currently supported by PyPy */
|
||||
#if IS_PYPY
|
||||
# ifndef WITHOUT_THREADING
|
||||
# define WITHOUT_THREADING
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if IS_PYPY
|
||||
# undef PyFile_AsFile
|
||||
# define PyFile_AsFile(o) (NULL)
|
||||
# undef PyByteArray_Check
|
||||
# define PyByteArray_Check(o) (0)
|
||||
#elif IS_PYTHON3
|
||||
/* Python 3 doesn't have PyFile_*() anymore */
|
||||
# define PyFile_AsFile(o) (NULL)
|
||||
#endif
|
||||
|
||||
#if PY_VERSION_HEX <= 0x03030000 && !(defined(CYTHON_PEP393_ENABLED) && CYTHON_PEP393_ENABLED)
|
||||
#define PyUnicode_IS_READY(op) (0)
|
||||
#define PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
|
||||
#define PyUnicode_KIND(u) (sizeof(Py_UNICODE))
|
||||
#define PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u))
|
||||
#endif
|
||||
|
||||
#if IS_PYPY
|
||||
# ifndef PyUnicode_FromFormat
|
||||
# define PyUnicode_FromFormat PyString_FromFormat
|
||||
# endif
|
||||
# if IS_PYTHON3 && !defined(PyBytes_FromFormat)
|
||||
# ifdef PyString_FromFormat
|
||||
# define PyBytes_FromFormat PyString_FromFormat
|
||||
# else
|
||||
#include <stdarg.h>
|
||||
static PyObject* PyBytes_FromFormat(const char* format, ...) {
|
||||
PyObject *string;
|
||||
va_list vargs;
|
||||
#ifdef HAVE_STDARG_PROTOTYPES
|
||||
va_start(vargs, format);
|
||||
#else
|
||||
va_start(vargs);
|
||||
#endif
|
||||
string = PyUnicode_FromFormatV(format, vargs);
|
||||
va_end(vargs);
|
||||
if (string && PyUnicode_Check(string)) {
|
||||
PyObject *bstring = PyUnicode_AsUTF8String(string);
|
||||
Py_DECREF(string);
|
||||
string = bstring;
|
||||
}
|
||||
if (string && !PyBytes_CheckExact(string)) {
|
||||
Py_DECREF(string);
|
||||
string = NULL;
|
||||
PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object");
|
||||
}
|
||||
return string;
|
||||
}
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* PySlice_GetIndicesEx() has wrong signature in Py<=3.1 */
|
||||
#if PY_VERSION_HEX >= 0x03020000
|
||||
# define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(o, l, b, e, s, sl)
|
||||
#else
|
||||
# define _lx_PySlice_GetIndicesEx(o, l, b, e, s, sl) PySlice_GetIndicesEx(((PySliceObject*)o), l, b, e, s, sl)
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_THREADING
|
||||
# undef PyEval_SaveThread
|
||||
# define PyEval_SaveThread() (NULL)
|
||||
# undef PyEval_RestoreThread
|
||||
# define PyEval_RestoreThread(state) if (state); else {}
|
||||
# undef PyGILState_Ensure
|
||||
# define PyGILState_Ensure() (PyGILState_UNLOCKED)
|
||||
# undef PyGILState_Release
|
||||
# define PyGILState_Release(state) if (state); else {}
|
||||
# undef Py_UNBLOCK_THREADS
|
||||
# define Py_UNBLOCK_THREADS _save = NULL;
|
||||
# undef Py_BLOCK_THREADS
|
||||
# define Py_BLOCK_THREADS if (_save); else {}
|
||||
#endif
|
||||
|
||||
#ifdef WITHOUT_THREADING
|
||||
# define ENABLE_THREADING 0
|
||||
#else
|
||||
# define ENABLE_THREADING 1
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 20704
|
||||
/* FIXME: hack to make new error reporting compile in old libxml2 versions */
|
||||
# define xmlStructuredErrorContext NULL
|
||||
# define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o)
|
||||
#endif
|
||||
|
||||
/* schematron was added in libxml2 2.6.21 */
|
||||
#ifdef LIBXML_SCHEMATRON_ENABLED
|
||||
# define ENABLE_SCHEMATRON 1
|
||||
#else
|
||||
# define ENABLE_SCHEMATRON 0
|
||||
# define XML_SCHEMATRON_OUT_QUIET 0
|
||||
# define XML_SCHEMATRON_OUT_XML 0
|
||||
# define XML_SCHEMATRON_OUT_ERROR 0
|
||||
typedef void xmlSchematron;
|
||||
typedef void xmlSchematronParserCtxt;
|
||||
typedef void xmlSchematronValidCtxt;
|
||||
# define xmlSchematronNewDocParserCtxt(doc) NULL
|
||||
# define xmlSchematronNewParserCtxt(file) NULL
|
||||
# define xmlSchematronParse(ctxt) NULL
|
||||
# define xmlSchematronFreeParserCtxt(ctxt)
|
||||
# define xmlSchematronFree(schema)
|
||||
# define xmlSchematronNewValidCtxt(schema, options) NULL
|
||||
# define xmlSchematronValidateDoc(ctxt, doc) 0
|
||||
# define xmlSchematronFreeValidCtxt(ctxt)
|
||||
# define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data)
|
||||
#endif
|
||||
|
||||
#if LIBXML_VERSION < 20900
|
||||
# define XML_PARSE_BIG_LINES 4194304
|
||||
#endif
|
||||
|
||||
#include "libxml/tree.h"
|
||||
#ifndef LIBXML2_NEW_BUFFER
|
||||
typedef xmlBuffer xmlBuf;
|
||||
# define xmlBufContent(buf) xmlBufferContent(buf)
|
||||
# define xmlBufUse(buf) xmlBufferLength(buf)
|
||||
#endif
|
||||
|
||||
/* libexslt 1.1.25+ support EXSLT functions in XPath */
|
||||
#if LIBXSLT_VERSION < 10125
|
||||
#define exsltDateXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltSetsXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltMathXpathCtxtRegister(ctxt, prefix)
|
||||
#define exsltStrXpathCtxtRegister(ctxt, prefix)
|
||||
#endif
|
||||
|
||||
/* work around MSDEV 6.0 */
|
||||
#if (_MSC_VER == 1200) && (WINVER < 0x0500)
|
||||
long _ftol( double ); //defined by VC6 C libs
|
||||
long _ftol2( double dblSource ) { return _ftol( dblSource ); }
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define unlikely_condition(x) __builtin_expect((x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define unlikely_condition(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define unlikely_condition(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
#ifndef Py_TYPE
|
||||
#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
|
||||
#endif
|
||||
|
||||
#define PY_NEW(T) \
|
||||
(((PyTypeObject*)(T))->tp_new( \
|
||||
(PyTypeObject*)(T), __pyx_empty_tuple, NULL))
|
||||
|
||||
#define _fqtypename(o) ((Py_TYPE(o))->tp_name)
|
||||
|
||||
#define lxml_malloc(count, item_size) \
|
||||
(unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
|
||||
(PyMem_Malloc((count) * item_size)))
|
||||
|
||||
#define lxml_realloc(mem, count, item_size) \
|
||||
(unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \
|
||||
(PyMem_Realloc(mem, (count) * item_size)))
|
||||
|
||||
#define lxml_free(mem) PyMem_Free(mem)
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#define _isString(obj) (PyString_CheckExact(obj) || \
|
||||
PyUnicode_CheckExact(obj) || \
|
||||
PyType_IsSubtype(Py_TYPE(obj), &PyBaseString_Type))
|
||||
#else
|
||||
/* builtin subtype type checks are almost as fast as exact checks in Py2.7+
|
||||
* and Unicode is more common in Py3 */
|
||||
#define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj))
|
||||
#endif
|
||||
|
||||
#define _isElement(c_node) \
|
||||
(((c_node)->type == XML_ELEMENT_NODE) || \
|
||||
((c_node)->type == XML_COMMENT_NODE) || \
|
||||
((c_node)->type == XML_ENTITY_REF_NODE) || \
|
||||
((c_node)->type == XML_PI_NODE))
|
||||
|
||||
#define _isElementOrXInclude(c_node) \
|
||||
(_isElement(c_node) || \
|
||||
((c_node)->type == XML_XINCLUDE_START) || \
|
||||
((c_node)->type == XML_XINCLUDE_END))
|
||||
|
||||
#define _getNs(c_node) \
|
||||
(((c_node)->ns == 0) ? 0 : ((c_node)->ns->href))
|
||||
|
||||
|
||||
/* Macro pair implementation of a depth first tree walker
|
||||
*
|
||||
* Calls the code block between the BEGIN and END macros for all elements
|
||||
* below c_tree_top (exclusively), starting at c_node (inclusively iff
|
||||
* 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes
|
||||
* that match _isElement(), the normal variant will stop on every node
|
||||
* except text nodes.
|
||||
*
|
||||
* To traverse the node and all of its children and siblings in Pyrex, call
|
||||
* cdef xmlNode* some_node
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* To traverse only the children and siblings of a node, call
|
||||
* cdef xmlNode* some_node
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* To traverse only the children, do:
|
||||
* cdef xmlNode* some_node
|
||||
* some_node = parent_node.children
|
||||
* BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1)
|
||||
* # do something with some_node
|
||||
* END_FOR_EACH_ELEMENT_FROM(some_node)
|
||||
*
|
||||
* NOTE: 'some_node' MUST be a plain 'xmlNode*' !
|
||||
*
|
||||
* NOTE: parent modification during the walk can divert the iterator, but
|
||||
* should not segfault !
|
||||
*/
|
||||
|
||||
#define _LX__ELEMENT_MATCH(c_node, only_elements) \
|
||||
((only_elements) ? (_isElement(c_node)) : 1)
|
||||
|
||||
#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \
|
||||
while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \
|
||||
c_node = c_node->next;
|
||||
|
||||
#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \
|
||||
{ \
|
||||
/* walk through children first */ \
|
||||
xmlNode* _lx__next = c_node->children; \
|
||||
if (_lx__next != 0) { \
|
||||
if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \
|
||||
_lx__next = 0; \
|
||||
} else { \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
} \
|
||||
} \
|
||||
if ((_lx__next == 0) && (c_node != c_stop_node)) { \
|
||||
/* try siblings */ \
|
||||
_lx__next = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
/* back off through parents */ \
|
||||
while (_lx__next == 0) { \
|
||||
c_node = c_node->parent; \
|
||||
if (c_node == 0) \
|
||||
break; \
|
||||
if (c_node == c_stop_node) \
|
||||
break; \
|
||||
if ((only_elements) && !_isElement(c_node)) \
|
||||
break; \
|
||||
/* we already traversed the parents -> siblings */ \
|
||||
_lx__next = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \
|
||||
} \
|
||||
} \
|
||||
c_node = _lx__next; \
|
||||
}
|
||||
|
||||
#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \
|
||||
{ \
|
||||
if (c_node != 0) { \
|
||||
const xmlNode* _lx__tree_top = (c_tree_top); \
|
||||
const int _lx__only_elements = (only_elements); \
|
||||
/* make sure we start at an element */ \
|
||||
if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \
|
||||
/* we skip the node, so 'inclusive' is irrelevant */ \
|
||||
if (c_node == _lx__tree_top) \
|
||||
c_node = 0; /* nothing to traverse */ \
|
||||
else { \
|
||||
c_node = c_node->next; \
|
||||
_LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \
|
||||
} \
|
||||
} else if (! (inclusive)) { \
|
||||
/* skip the first node */ \
|
||||
_LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
|
||||
} \
|
||||
\
|
||||
/* now run the user code on the elements we find */ \
|
||||
while (c_node != 0) { \
|
||||
/* here goes the code to be run for each element */
|
||||
|
||||
#define _LX__END_FOR_EACH_FROM(c_node) \
|
||||
_LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \
|
||||
_LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1)
|
||||
|
||||
#define END_FOR_EACH_ELEMENT_FROM(c_node) \
|
||||
_LX__END_FOR_EACH_FROM(c_node)
|
||||
|
||||
#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \
|
||||
_LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0)
|
||||
|
||||
#define END_FOR_EACH_FROM(c_node) \
|
||||
_LX__END_FOR_EACH_FROM(c_node)
|
||||
|
||||
|
||||
#endif /* HAS_ETREE_DEFS_H */
|
||||
234
lib/python3.5/site-packages/lxml/includes/etreepublic.pxd
Normal file
234
lib/python3.5/site-packages/lxml/includes/etreepublic.pxd
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
# public Cython/C interface to lxml.etree
|
||||
|
||||
from lxml.includes cimport tree
|
||||
from lxml.includes.tree cimport const_xmlChar
|
||||
|
||||
cdef extern from "lxml-version.h":
|
||||
cdef char* LXML_VERSION_STRING
|
||||
|
||||
cdef extern from "etree_defs.h":
|
||||
# test if c_node is considered an Element (i.e. Element, Comment, etc.)
|
||||
cdef bint _isElement(tree.xmlNode* c_node) nogil
|
||||
|
||||
# return the namespace URI of the node or NULL
|
||||
cdef const_xmlChar* _getNs(tree.xmlNode* node) nogil
|
||||
|
||||
# pair of macros for tree traversal
|
||||
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top,
|
||||
tree.xmlNode* start_node,
|
||||
int start_node_inclusive) nogil
|
||||
cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) nogil
|
||||
|
||||
cdef extern from "lxml.etree_api.h":
|
||||
|
||||
# first function to call!
|
||||
cdef int import_lxml__etree() except -1
|
||||
|
||||
##########################################################################
|
||||
# public ElementTree API classes
|
||||
|
||||
cdef class lxml.etree._Document [ object LxmlDocument ]:
|
||||
cdef tree.xmlDoc* _c_doc
|
||||
|
||||
cdef class lxml.etree._Element [ object LxmlElement ]:
|
||||
cdef _Document _doc
|
||||
cdef tree.xmlNode* _c_node
|
||||
|
||||
cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]:
|
||||
pass
|
||||
|
||||
cdef class lxml.etree._ElementTree [ object LxmlElementTree ]:
|
||||
cdef _Document _doc
|
||||
cdef _Element _context_node
|
||||
|
||||
cdef class lxml.etree.ElementClassLookup [ object LxmlElementClassLookup ]:
|
||||
cdef object (*_lookup_function)(object, _Document, tree.xmlNode*)
|
||||
|
||||
cdef class lxml.etree.FallbackElementClassLookup(ElementClassLookup) \
|
||||
[ object LxmlFallbackElementClassLookup ]:
|
||||
cdef ElementClassLookup fallback
|
||||
cdef object (*_fallback_function)(object, _Document, tree.xmlNode*)
|
||||
|
||||
##########################################################################
|
||||
# creating Element objects
|
||||
|
||||
# create an Element for a C-node in the Document
|
||||
cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node)
|
||||
|
||||
# create an ElementTree for an Element
|
||||
cdef _ElementTree elementTreeFactory(_Element context_node)
|
||||
|
||||
# create an ElementTree subclass for an Element
|
||||
cdef _ElementTree newElementTree(_Element context_node, object subclass)
|
||||
|
||||
# create a new Element for an existing or new document (doc = None)
|
||||
# builds Python object after setting text, tail, namespaces and attributes
|
||||
cdef _Element makeElement(tag, _Document doc, parser,
|
||||
text, tail, attrib, nsmap)
|
||||
|
||||
# create a new SubElement for an existing parent
|
||||
# builds Python object after setting text, tail, namespaces and attributes
|
||||
cdef _Element makeSubElement(_Element parent, tag, text, tail,
|
||||
attrib, nsmap)
|
||||
|
||||
# deep copy a node to include it in the Document
|
||||
cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root)
|
||||
|
||||
# set the internal lookup function for Element/Comment/PI classes
|
||||
# use setElementClassLookupFunction(NULL, None) to reset it
|
||||
# note that the lookup function *must always* return an _Element subclass!
|
||||
cdef void setElementClassLookupFunction(
|
||||
object (*function)(object, _Document, tree.xmlNode*), object state)
|
||||
|
||||
# lookup function that always returns the default Element class
|
||||
# note that the first argument is expected to be None!
|
||||
cdef object lookupDefaultElementClass(_1, _Document _2,
|
||||
tree.xmlNode* c_node)
|
||||
|
||||
# lookup function for namespace/tag specific Element classes
|
||||
# note that the first argument is expected to be None!
|
||||
cdef object lookupNamespaceElementClass(_1, _Document _2,
|
||||
tree.xmlNode* c_node)
|
||||
|
||||
# call the fallback lookup function of a FallbackElementClassLookup
|
||||
cdef object callLookupFallback(FallbackElementClassLookup lookup,
|
||||
_Document doc, tree.xmlNode* c_node)
|
||||
|
||||
##########################################################################
|
||||
# XML attribute access
|
||||
|
||||
# return an attribute value for a C attribute on a C element node
|
||||
cdef object attributeValue(tree.xmlNode* c_element,
|
||||
tree.xmlAttr* c_attrib_node)
|
||||
|
||||
# return the value of the attribute with 'ns' and 'name' (or None)
|
||||
cdef object attributeValueFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_ns, const_xmlChar* c_name)
|
||||
|
||||
# return the value of attribute "{ns}name", or the default value
|
||||
cdef object getAttributeValue(_Element element, key, default)
|
||||
|
||||
# return an iterator over attribute names (1), values (2) or items (3)
|
||||
# attributes must not be removed during iteration!
|
||||
cdef object iterattributes(_Element element, int keysvalues)
|
||||
|
||||
# return the list of all attribute names (1), values (2) or items (3)
|
||||
cdef list collectAttributes(tree.xmlNode* c_element, int keysvalues)
|
||||
|
||||
# set an attribute value on an element
|
||||
# on failure, sets an exception and returns -1
|
||||
cdef int setAttributeValue(_Element element, key, value) except -1
|
||||
|
||||
# delete an attribute
|
||||
# on failure, sets an exception and returns -1
|
||||
cdef int delAttribute(_Element element, key) except -1
|
||||
|
||||
# delete an attribute based on name and namespace URI
|
||||
# returns -1 if the attribute was not found (no exception)
|
||||
cdef int delAttributeFromNsName(tree.xmlNode* c_element,
|
||||
const_xmlChar* c_href, const_xmlChar* c_name)
|
||||
|
||||
##########################################################################
|
||||
# XML node helper functions
|
||||
|
||||
# check if the element has at least one child
|
||||
cdef bint hasChild(tree.xmlNode* c_node) nogil
|
||||
|
||||
# find child element number 'index' (supports negative indexes)
|
||||
cdef tree.xmlNode* findChild(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# find child element number 'index' starting at first one
|
||||
cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# find child element number 'index' starting at last one
|
||||
cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node,
|
||||
Py_ssize_t index) nogil
|
||||
|
||||
# return next/previous sibling element of the node
|
||||
cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil
|
||||
cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil
|
||||
|
||||
##########################################################################
|
||||
# iterators (DEPRECATED API, don't use in new code!)
|
||||
|
||||
cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]:
|
||||
cdef char* _href
|
||||
cdef char* _name
|
||||
|
||||
# store "{ns}tag" (or None) filter for this matcher or element iterator
|
||||
# ** unless _href *and* _name are set up 'by hand', this function *must*
|
||||
# ** be called when subclassing the iterator below!
|
||||
cdef void initTagMatch(_ElementTagMatcher matcher, tag)
|
||||
|
||||
cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [
|
||||
object LxmlElementIterator ]:
|
||||
cdef _Element _node
|
||||
cdef tree.xmlNode* (*_next_element)(tree.xmlNode*)
|
||||
|
||||
# store the initial node of the iterator if it matches the required tag
|
||||
# or its next matching sibling if not
|
||||
cdef void iteratorStoreNext(_ElementIterator iterator, _Element node)
|
||||
|
||||
##########################################################################
|
||||
# other helper functions
|
||||
|
||||
# check if a C node matches a tag name and namespace
|
||||
# (NULL allowed for each => always matches)
|
||||
cdef int tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name)
|
||||
|
||||
# convert a UTF-8 char* to a Python string or unicode string
|
||||
cdef object pyunicode(const_xmlChar* s)
|
||||
|
||||
# convert the string to UTF-8 using the normal lxml.etree semantics
|
||||
cdef bytes utf8(object s)
|
||||
|
||||
# split a tag into a (URI, name) tuple, return None as URI for '{}tag'
|
||||
cdef tuple getNsTag(object tag)
|
||||
|
||||
# split a tag into a (URI, name) tuple, return b'' as URI for '{}tag'
|
||||
cdef tuple getNsTagWithEmptyNs(object tag)
|
||||
|
||||
# get the "{ns}tag" string for a C node
|
||||
cdef object namespacedName(tree.xmlNode* c_node)
|
||||
|
||||
# get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL)
|
||||
cdef object namespacedNameFromNsName(const_xmlChar* c_ns, const_xmlChar* c_tag)
|
||||
|
||||
# check if the node has a text value (which may be '')
|
||||
cdef bint hasText(tree.xmlNode* c_node) nogil
|
||||
|
||||
# check if the node has a tail value (which may be '')
|
||||
cdef bint hasTail(tree.xmlNode* c_node) nogil
|
||||
|
||||
# get the text content of an element (or None)
|
||||
cdef object textOf(tree.xmlNode* c_node)
|
||||
|
||||
# get the tail content of an element (or None)
|
||||
cdef object tailOf(tree.xmlNode* c_node)
|
||||
|
||||
# set the text value of an element
|
||||
cdef int setNodeText(tree.xmlNode* c_node, text) except -1
|
||||
|
||||
# set the tail text value of an element
|
||||
cdef int setTailText(tree.xmlNode* c_node, text) except -1
|
||||
|
||||
# append an element to the children of a parent element
|
||||
# deprecated: don't use, does not propagate exceptions!
|
||||
# use appendChildToElement() instead
|
||||
cdef void appendChild(_Element parent, _Element child)
|
||||
|
||||
# added in lxml 3.3 as a safe replacement for appendChild()
|
||||
# return -1 for exception, 0 for ok
|
||||
cdef int appendChildToElement(_Element parent, _Element child) except -1
|
||||
|
||||
# recursively lookup a namespace in element or ancestors, or create it
|
||||
cdef tree.xmlNs* findOrBuildNodeNsPrefix(
|
||||
_Document doc, tree.xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix)
|
||||
|
||||
# find the Document of an Element, ElementTree or Document (itself!)
|
||||
cdef _Document documentOrRaise(object input)
|
||||
|
||||
# find the root Element of an Element (itself!), ElementTree or Document
|
||||
cdef _Element rootNodeOrRaise(object input)
|
||||
56
lib/python3.5/site-packages/lxml/includes/htmlparser.pxd
Normal file
56
lib/python3.5/site-packages/lxml/includes/htmlparser.pxd
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
from libc.string cimport const_char
|
||||
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||
from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1
|
||||
|
||||
cdef extern from "libxml/HTMLparser.h":
|
||||
ctypedef enum htmlParserOption:
|
||||
HTML_PARSE_NOERROR # suppress error reports
|
||||
HTML_PARSE_NOWARNING # suppress warning reports
|
||||
HTML_PARSE_PEDANTIC # pedantic error reporting
|
||||
HTML_PARSE_NOBLANKS # remove blank nodes
|
||||
HTML_PARSE_NONET # Forbid network access
|
||||
# libxml2 2.6.21+ only:
|
||||
HTML_PARSE_RECOVER # Relaxed parsing
|
||||
HTML_PARSE_COMPACT # compact small text nodes
|
||||
# libxml2 2.7.7+ only:
|
||||
HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements
|
||||
# libxml2 2.7.8+ only:
|
||||
HTML_PARSE_NODEFDTD # do not default a doctype if not found
|
||||
# libxml2 2.8.0+ only:
|
||||
XML_PARSE_IGNORE_ENC # ignore internal document encoding hint
|
||||
|
||||
xmlSAXHandlerV1 htmlDefaultSAXHandler
|
||||
|
||||
cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
|
||||
char* buffer, int size) nogil
|
||||
cdef xmlParserCtxt* htmlCreateFileParserCtxt(
|
||||
char* filename, char* encoding) nogil
|
||||
cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||
void* user_data,
|
||||
char* chunk, int size,
|
||||
char* filename, int enc) nogil
|
||||
cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
|
||||
cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil
|
||||
cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
|
||||
cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil
|
||||
cdef int htmlParseChunk(xmlParserCtxt* ctxt,
|
||||
char* chunk, int size, int terminate) nogil
|
||||
|
||||
cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
|
||||
char* buffer, char* URL, const_char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
|
||||
xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void* ioctx,
|
||||
char* URL, const_char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
|
||||
char* buffer, int size,
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
3
lib/python3.5/site-packages/lxml/includes/lxml-version.h
Normal file
3
lib/python3.5/site-packages/lxml/includes/lxml-version.h
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
#ifndef LXML_VERSION_STRING
|
||||
#define LXML_VERSION_STRING "3.5.0"
|
||||
#endif
|
||||
64
lib/python3.5/site-packages/lxml/includes/relaxng.pxd
Normal file
64
lib/python3.5/site-packages/lxml/includes/relaxng.pxd
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
|
||||
|
||||
cdef extern from "libxml/relaxng.h":
|
||||
ctypedef struct xmlRelaxNG
|
||||
ctypedef struct xmlRelaxNGParserCtxt
|
||||
|
||||
ctypedef struct xmlRelaxNGValidCtxt
|
||||
|
||||
ctypedef enum xmlRelaxNGValidErr:
|
||||
XML_RELAXNG_OK = 0
|
||||
XML_RELAXNG_ERR_MEMORY = 1
|
||||
XML_RELAXNG_ERR_TYPE = 2
|
||||
XML_RELAXNG_ERR_TYPEVAL = 3
|
||||
XML_RELAXNG_ERR_DUPID = 4
|
||||
XML_RELAXNG_ERR_TYPECMP = 5
|
||||
XML_RELAXNG_ERR_NOSTATE = 6
|
||||
XML_RELAXNG_ERR_NODEFINE = 7
|
||||
XML_RELAXNG_ERR_LISTEXTRA = 8
|
||||
XML_RELAXNG_ERR_LISTEMPTY = 9
|
||||
XML_RELAXNG_ERR_INTERNODATA = 10
|
||||
XML_RELAXNG_ERR_INTERSEQ = 11
|
||||
XML_RELAXNG_ERR_INTEREXTRA = 12
|
||||
XML_RELAXNG_ERR_ELEMNAME = 13
|
||||
XML_RELAXNG_ERR_ATTRNAME = 14
|
||||
XML_RELAXNG_ERR_ELEMNONS = 15
|
||||
XML_RELAXNG_ERR_ATTRNONS = 16
|
||||
XML_RELAXNG_ERR_ELEMWRONGNS = 17
|
||||
XML_RELAXNG_ERR_ATTRWRONGNS = 18
|
||||
XML_RELAXNG_ERR_ELEMEXTRANS = 19
|
||||
XML_RELAXNG_ERR_ATTREXTRANS = 20
|
||||
XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
|
||||
XML_RELAXNG_ERR_NOELEM = 22
|
||||
XML_RELAXNG_ERR_NOTELEM = 23
|
||||
XML_RELAXNG_ERR_ATTRVALID = 24
|
||||
XML_RELAXNG_ERR_CONTENTVALID = 25
|
||||
XML_RELAXNG_ERR_EXTRACONTENT = 26
|
||||
XML_RELAXNG_ERR_INVALIDATTR = 27
|
||||
XML_RELAXNG_ERR_DATAELEM = 28
|
||||
XML_RELAXNG_ERR_VALELEM = 29
|
||||
XML_RELAXNG_ERR_LISTELEM = 30
|
||||
XML_RELAXNG_ERR_DATATYPE = 31
|
||||
XML_RELAXNG_ERR_VALUE = 32
|
||||
XML_RELAXNG_ERR_LIST = 33
|
||||
XML_RELAXNG_ERR_NOGRAMMAR = 34
|
||||
XML_RELAXNG_ERR_EXTRADATA = 35
|
||||
XML_RELAXNG_ERR_LACKDATA = 36
|
||||
XML_RELAXNG_ERR_INTERNAL = 37
|
||||
XML_RELAXNG_ERR_ELEMWRONG = 38
|
||||
XML_RELAXNG_ERR_TEXTWRONG = 39
|
||||
|
||||
cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) nogil
|
||||
cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) nogil
|
||||
cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) nogil
|
||||
cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) nogil
|
||||
cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) nogil
|
||||
cdef void xmlRelaxNGFree(xmlRelaxNG* schema) nogil
|
||||
cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) nogil
|
||||
cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) nogil
|
||||
|
||||
cdef void xmlRelaxNGSetValidStructuredErrors(
|
||||
xmlRelaxNGValidCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) nogil
|
||||
cdef void xmlRelaxNGSetParserStructuredErrors(
|
||||
xmlRelaxNGParserCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) nogil
|
||||
34
lib/python3.5/site-packages/lxml/includes/schematron.pxd
Normal file
34
lib/python3.5/site-packages/lxml/includes/schematron.pxd
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
from lxml.includes cimport xmlerror
|
||||
from lxml.includes.tree cimport xmlDoc
|
||||
|
||||
cdef extern from "libxml/schematron.h":
|
||||
ctypedef struct xmlSchematron
|
||||
ctypedef struct xmlSchematronParserCtxt
|
||||
ctypedef struct xmlSchematronValidCtxt
|
||||
|
||||
ctypedef enum xmlSchematronValidOptions:
|
||||
XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report
|
||||
XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report
|
||||
XML_SCHEMATRON_OUT_XML = 4 # output SVRL
|
||||
XML_SCHEMATRON_OUT_ERROR = 8 # output via xmlStructuredErrorFunc
|
||||
XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor
|
||||
XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer
|
||||
XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism
|
||||
|
||||
cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(
|
||||
xmlDoc* doc) nogil
|
||||
cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(
|
||||
char* filename) nogil
|
||||
cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(
|
||||
xmlSchematron* schema, int options) nogil
|
||||
|
||||
cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) nogil
|
||||
cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
|
||||
xmlDoc* instance) nogil
|
||||
|
||||
cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) nogil
|
||||
cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) nogil
|
||||
cdef void xmlSchematronFree(xmlSchematron* schema) nogil
|
||||
cdef void xmlSchematronSetValidStructuredErrors(
|
||||
xmlSchematronValidCtxt* ctxt,
|
||||
xmlerror.xmlStructuredErrorFunc error_func, void *data)
|
||||
474
lib/python3.5/site-packages/lxml/includes/tree.pxd
Normal file
474
lib/python3.5/site-packages/lxml/includes/tree.pxd
Normal file
|
|
@ -0,0 +1,474 @@
|
|||
from libc cimport stdio
|
||||
from libc.string cimport const_char, const_uchar
|
||||
|
||||
cdef extern from "lxml-version.h":
|
||||
# deprecated declaration, use etreepublic.pxd instead
|
||||
cdef char* LXML_VERSION_STRING
|
||||
|
||||
cdef extern from "libxml/xmlversion.h":
|
||||
cdef const_char* xmlParserVersion
|
||||
cdef int LIBXML_VERSION
|
||||
|
||||
cdef extern from "libxml/xmlstring.h":
|
||||
ctypedef unsigned char xmlChar
|
||||
ctypedef const xmlChar const_xmlChar "const xmlChar"
|
||||
cdef int xmlStrlen(const_xmlChar* str) nogil
|
||||
cdef xmlChar* xmlStrdup(const_xmlChar* cur) nogil
|
||||
cdef int xmlStrncmp(const_xmlChar* str1, const_xmlChar* str2, int length) nogil
|
||||
cdef int xmlStrcmp(const_xmlChar* str1, const_xmlChar* str2) nogil
|
||||
cdef int xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) nogil
|
||||
cdef const_xmlChar* xmlStrstr(const_xmlChar* str1, const_xmlChar* str2) nogil
|
||||
cdef const_xmlChar* xmlStrchr(const_xmlChar* str1, xmlChar ch) nogil
|
||||
cdef const_xmlChar* _xcstr "(const xmlChar*)PyBytes_AS_STRING" (object s)
|
||||
|
||||
cdef extern from "libxml/encoding.h":
|
||||
ctypedef enum xmlCharEncoding:
|
||||
XML_CHAR_ENCODING_ERROR = -1 # No char encoding detected
|
||||
XML_CHAR_ENCODING_NONE = 0 # No char encoding detected
|
||||
XML_CHAR_ENCODING_UTF8 = 1 # UTF-8
|
||||
XML_CHAR_ENCODING_UTF16LE = 2 # UTF-16 little endian
|
||||
XML_CHAR_ENCODING_UTF16BE = 3 # UTF-16 big endian
|
||||
XML_CHAR_ENCODING_UCS4LE = 4 # UCS-4 little endian
|
||||
XML_CHAR_ENCODING_UCS4BE = 5 # UCS-4 big endian
|
||||
XML_CHAR_ENCODING_EBCDIC = 6 # EBCDIC uh!
|
||||
XML_CHAR_ENCODING_UCS4_2143 = 7 # UCS-4 unusual ordering
|
||||
XML_CHAR_ENCODING_UCS4_3412 = 8 # UCS-4 unusual ordering
|
||||
XML_CHAR_ENCODING_UCS2 = 9 # UCS-2
|
||||
XML_CHAR_ENCODING_8859_1 = 10 # ISO-8859-1 ISO Latin 1
|
||||
XML_CHAR_ENCODING_8859_2 = 11 # ISO-8859-2 ISO Latin 2
|
||||
XML_CHAR_ENCODING_8859_3 = 12 # ISO-8859-3
|
||||
XML_CHAR_ENCODING_8859_4 = 13 # ISO-8859-4
|
||||
XML_CHAR_ENCODING_8859_5 = 14 # ISO-8859-5
|
||||
XML_CHAR_ENCODING_8859_6 = 15 # ISO-8859-6
|
||||
XML_CHAR_ENCODING_8859_7 = 16 # ISO-8859-7
|
||||
XML_CHAR_ENCODING_8859_8 = 17 # ISO-8859-8
|
||||
XML_CHAR_ENCODING_8859_9 = 18 # ISO-8859-9
|
||||
XML_CHAR_ENCODING_2022_JP = 19 # ISO-2022-JP
|
||||
XML_CHAR_ENCODING_SHIFT_JIS = 20 # Shift_JIS
|
||||
XML_CHAR_ENCODING_EUC_JP = 21 # EUC-JP
|
||||
XML_CHAR_ENCODING_ASCII = 22 # pure ASCII
|
||||
|
||||
ctypedef struct xmlCharEncodingHandler
|
||||
cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) nogil
|
||||
cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler(
|
||||
xmlCharEncoding enc) nogil
|
||||
cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) nogil
|
||||
cdef xmlCharEncoding xmlDetectCharEncoding(const_xmlChar* text, int len) nogil
|
||||
cdef const_char* xmlGetCharEncodingName(xmlCharEncoding enc) nogil
|
||||
cdef xmlCharEncoding xmlParseCharEncoding(char* name) nogil
|
||||
ctypedef int (*xmlCharEncodingOutputFunc)(
|
||||
unsigned char *out_buf, int *outlen, const_uchar *in_buf, int *inlen)
|
||||
|
||||
cdef extern from "libxml/chvalid.h":
|
||||
cdef int xmlIsChar_ch(char c) nogil
|
||||
|
||||
cdef extern from "libxml/hash.h":
|
||||
ctypedef struct xmlHashTable
|
||||
ctypedef void (*xmlHashScanner)(void* payload, void* data, const_xmlChar* name) # may require GIL!
|
||||
void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil
|
||||
void* xmlHashLookup(xmlHashTable* table, const_xmlChar* name) nogil
|
||||
ctypedef void (*xmlHashDeallocator)(void *payload, xmlChar *name)
|
||||
cdef xmlHashTable* xmlHashCreate(int size)
|
||||
cdef xmlHashTable* xmlHashCreateDict(int size, xmlDict *dict)
|
||||
cdef int xmlHashSize(xmlHashTable* table)
|
||||
cdef void xmlHashFree(xmlHashTable* table, xmlHashDeallocator f)
|
||||
|
||||
cdef extern from *: # actually "libxml/dict.h"
|
||||
# libxml/dict.h appears to be broken to include in C
|
||||
ctypedef struct xmlDict
|
||||
cdef const_xmlChar* xmlDictLookup(xmlDict* dict, const_xmlChar* name, int len) nogil
|
||||
cdef const_xmlChar* xmlDictExists(xmlDict* dict, const_xmlChar* name, int len) nogil
|
||||
cdef int xmlDictOwns(xmlDict* dict, const_xmlChar* name) nogil
|
||||
cdef size_t xmlDictSize(xmlDict* dict) nogil
|
||||
|
||||
cdef extern from "libxml/tree.h":
|
||||
ctypedef struct xmlDoc
|
||||
ctypedef struct xmlAttr
|
||||
ctypedef struct xmlNotationTable
|
||||
|
||||
ctypedef enum xmlElementType:
|
||||
XML_ELEMENT_NODE= 1
|
||||
XML_ATTRIBUTE_NODE= 2
|
||||
XML_TEXT_NODE= 3
|
||||
XML_CDATA_SECTION_NODE= 4
|
||||
XML_ENTITY_REF_NODE= 5
|
||||
XML_ENTITY_NODE= 6
|
||||
XML_PI_NODE= 7
|
||||
XML_COMMENT_NODE= 8
|
||||
XML_DOCUMENT_NODE= 9
|
||||
XML_DOCUMENT_TYPE_NODE= 10
|
||||
XML_DOCUMENT_FRAG_NODE= 11
|
||||
XML_NOTATION_NODE= 12
|
||||
XML_HTML_DOCUMENT_NODE= 13
|
||||
XML_DTD_NODE= 14
|
||||
XML_ELEMENT_DECL= 15
|
||||
XML_ATTRIBUTE_DECL= 16
|
||||
XML_ENTITY_DECL= 17
|
||||
XML_NAMESPACE_DECL= 18
|
||||
XML_XINCLUDE_START= 19
|
||||
XML_XINCLUDE_END= 20
|
||||
|
||||
ctypedef enum xmlElementTypeVal:
|
||||
XML_ELEMENT_TYPE_UNDEFINED= 0
|
||||
XML_ELEMENT_TYPE_EMPTY= 1
|
||||
XML_ELEMENT_TYPE_ANY= 2
|
||||
XML_ELEMENT_TYPE_MIXED= 3
|
||||
XML_ELEMENT_TYPE_ELEMENT= 4
|
||||
|
||||
ctypedef enum xmlElementContentType:
|
||||
XML_ELEMENT_CONTENT_PCDATA= 1
|
||||
XML_ELEMENT_CONTENT_ELEMENT= 2
|
||||
XML_ELEMENT_CONTENT_SEQ= 3
|
||||
XML_ELEMENT_CONTENT_OR= 4
|
||||
|
||||
ctypedef enum xmlElementContentOccur:
|
||||
XML_ELEMENT_CONTENT_ONCE= 1
|
||||
XML_ELEMENT_CONTENT_OPT= 2
|
||||
XML_ELEMENT_CONTENT_MULT= 3
|
||||
XML_ELEMENT_CONTENT_PLUS= 4
|
||||
|
||||
ctypedef enum xmlAttributeType:
|
||||
XML_ATTRIBUTE_CDATA = 1
|
||||
XML_ATTRIBUTE_ID= 2
|
||||
XML_ATTRIBUTE_IDREF= 3
|
||||
XML_ATTRIBUTE_IDREFS= 4
|
||||
XML_ATTRIBUTE_ENTITY= 5
|
||||
XML_ATTRIBUTE_ENTITIES= 6
|
||||
XML_ATTRIBUTE_NMTOKEN= 7
|
||||
XML_ATTRIBUTE_NMTOKENS= 8
|
||||
XML_ATTRIBUTE_ENUMERATION= 9
|
||||
XML_ATTRIBUTE_NOTATION= 10
|
||||
|
||||
ctypedef enum xmlAttributeDefault:
|
||||
XML_ATTRIBUTE_NONE= 1
|
||||
XML_ATTRIBUTE_REQUIRED= 2
|
||||
XML_ATTRIBUTE_IMPLIED= 3
|
||||
XML_ATTRIBUTE_FIXED= 4
|
||||
|
||||
ctypedef enum xmlEntityType:
|
||||
XML_INTERNAL_GENERAL_ENTITY= 1
|
||||
XML_EXTERNAL_GENERAL_PARSED_ENTITY= 2
|
||||
XML_EXTERNAL_GENERAL_UNPARSED_ENTITY= 3
|
||||
XML_INTERNAL_PARAMETER_ENTITY= 4
|
||||
XML_EXTERNAL_PARAMETER_ENTITY= 5
|
||||
XML_INTERNAL_PREDEFINED_ENTITY= 6
|
||||
|
||||
ctypedef struct xmlNs:
|
||||
const_xmlChar* href
|
||||
const_xmlChar* prefix
|
||||
xmlNs* next
|
||||
|
||||
ctypedef struct xmlNode:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlChar* content
|
||||
xmlAttr* properties
|
||||
xmlNs* ns
|
||||
xmlNs* nsDef
|
||||
unsigned short line
|
||||
|
||||
ctypedef struct xmlElementContent:
|
||||
xmlElementContentType type
|
||||
xmlElementContentOccur ocur
|
||||
const_xmlChar *name
|
||||
xmlElementContent *c1
|
||||
xmlElementContent *c2
|
||||
xmlElementContent *parent
|
||||
const_xmlChar *prefix
|
||||
|
||||
ctypedef struct xmlEnumeration:
|
||||
xmlEnumeration *next
|
||||
const_xmlChar *name
|
||||
|
||||
ctypedef struct xmlAttribute:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlDtd* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlAttribute* nexth
|
||||
xmlAttributeType atype
|
||||
xmlAttributeDefault def_ "def"
|
||||
const_xmlChar* defaultValue
|
||||
xmlEnumeration* tree
|
||||
const_xmlChar* prefix
|
||||
const_xmlChar* elem
|
||||
|
||||
ctypedef struct xmlElement:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlElementTypeVal etype
|
||||
xmlElementContent* content
|
||||
xmlAttribute* attributes
|
||||
const_xmlChar* prefix
|
||||
void *contModel
|
||||
|
||||
ctypedef struct xmlEntity:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlDtd* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlChar* orig
|
||||
xmlChar* content
|
||||
int length
|
||||
xmlEntityType etype
|
||||
const_xmlChar* ExternalID
|
||||
const_xmlChar* SystemID
|
||||
xmlEntity* nexte
|
||||
const_xmlChar* URI
|
||||
int owner
|
||||
int checked
|
||||
|
||||
ctypedef struct xmlDtd:
|
||||
const_xmlChar* name
|
||||
const_xmlChar* ExternalID
|
||||
const_xmlChar* SystemID
|
||||
void* notations
|
||||
void* entities
|
||||
void* pentities
|
||||
void* attributes
|
||||
void* elements
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlDoc* doc
|
||||
|
||||
ctypedef struct xmlDoc:
|
||||
xmlElementType type
|
||||
char* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlNode* next
|
||||
xmlNode* prev
|
||||
xmlDoc* doc
|
||||
xmlDict* dict
|
||||
xmlHashTable* ids
|
||||
int standalone
|
||||
const_xmlChar* version
|
||||
const_xmlChar* encoding
|
||||
const_xmlChar* URL
|
||||
void* _private
|
||||
xmlDtd* intSubset
|
||||
xmlDtd* extSubset
|
||||
|
||||
ctypedef struct xmlAttr:
|
||||
void* _private
|
||||
xmlElementType type
|
||||
const_xmlChar* name
|
||||
xmlNode* children
|
||||
xmlNode* last
|
||||
xmlNode* parent
|
||||
xmlAttr* next
|
||||
xmlAttr* prev
|
||||
xmlDoc* doc
|
||||
xmlNs* ns
|
||||
|
||||
ctypedef struct xmlID:
|
||||
const_xmlChar* value
|
||||
const_xmlChar* name
|
||||
xmlAttr* attr
|
||||
xmlDoc* doc
|
||||
|
||||
ctypedef struct xmlBuffer
|
||||
|
||||
ctypedef struct xmlBuf # new in libxml2 2.9
|
||||
|
||||
ctypedef struct xmlOutputBuffer:
|
||||
xmlBuf* buffer
|
||||
xmlBuf* conv
|
||||
int error
|
||||
|
||||
const_xmlChar* XML_XML_NAMESPACE
|
||||
|
||||
cdef void xmlFreeDoc(xmlDoc* cur) nogil
|
||||
cdef void xmlFreeDtd(xmlDtd* cur) nogil
|
||||
cdef void xmlFreeNode(xmlNode* cur) nogil
|
||||
cdef void xmlFreeNsList(xmlNs* ns) nogil
|
||||
cdef void xmlFreeNs(xmlNs* ns) nogil
|
||||
cdef void xmlFree(void* buf) nogil
|
||||
|
||||
cdef xmlNode* xmlNewNode(xmlNs* ns, const_xmlChar* name) nogil
|
||||
cdef xmlNode* xmlNewDocText(xmlDoc* doc, const_xmlChar* content) nogil
|
||||
cdef xmlNode* xmlNewDocComment(xmlDoc* doc, const_xmlChar* content) nogil
|
||||
cdef xmlNode* xmlNewDocPI(xmlDoc* doc, const_xmlChar* name, const_xmlChar* content) nogil
|
||||
cdef xmlNode* xmlNewReference(xmlDoc* doc, const_xmlChar* name) nogil
|
||||
cdef xmlNode* xmlNewCDataBlock(xmlDoc* doc, const_xmlChar* text, int len) nogil
|
||||
cdef xmlNs* xmlNewNs(xmlNode* node, const_xmlChar* href, const_xmlChar* prefix) nogil
|
||||
cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) nogil
|
||||
cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) nogil
|
||||
cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) nogil
|
||||
cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) nogil
|
||||
cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns,
|
||||
const_xmlChar* name, const_xmlChar* content) nogil
|
||||
cdef xmlDoc* xmlNewDoc(const_xmlChar* version) nogil
|
||||
cdef xmlAttr* xmlNewProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns,
|
||||
const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef xmlChar* xmlGetNoNsProp(xmlNode* node, const_xmlChar* name) nogil
|
||||
cdef xmlChar* xmlGetNsProp(xmlNode* node, const_xmlChar* name, const_xmlChar* nameSpace) nogil
|
||||
cdef void xmlSetNs(xmlNode* node, xmlNs* ns) nogil
|
||||
cdef xmlAttr* xmlSetProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns,
|
||||
const_xmlChar* name, const_xmlChar* value) nogil
|
||||
cdef int xmlRemoveProp(xmlAttr* cur) nogil
|
||||
cdef xmlChar* xmlGetNodePath(xmlNode* node) nogil
|
||||
cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) nogil
|
||||
cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size,
|
||||
char* encoding) nogil
|
||||
cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur,
|
||||
char* encoding) nogil
|
||||
|
||||
cdef void xmlUnlinkNode(xmlNode* cur) nogil
|
||||
cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) nogil
|
||||
cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) nogil
|
||||
cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) nogil
|
||||
cdef xmlAttr* xmlHasProp(xmlNode* node, const_xmlChar* name) nogil
|
||||
cdef xmlAttr* xmlHasNsProp(xmlNode* node, const_xmlChar* name, const_xmlChar* nameSpace) nogil
|
||||
cdef xmlChar* xmlNodeGetContent(xmlNode* cur) nogil
|
||||
cdef int xmlNodeBufGetContent(xmlBuffer* buffer, xmlNode* cur) nogil
|
||||
cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, const_xmlChar* prefix) nogil
|
||||
cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, const_xmlChar* href) nogil
|
||||
cdef int xmlIsBlankNode(xmlNode* node) nogil
|
||||
cdef long xmlGetLineNo(xmlNode* node) nogil
|
||||
cdef void xmlElemDump(stdio.FILE* f, xmlDoc* doc, xmlNode* cur) nogil
|
||||
cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf,
|
||||
xmlDoc* doc, xmlNode* cur, int level,
|
||||
int format, const_char* encoding) nogil
|
||||
cdef void xmlNodeSetName(xmlNode* cur, const_xmlChar* name) nogil
|
||||
cdef void xmlNodeSetContent(xmlNode* cur, const_xmlChar* content) nogil
|
||||
cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) nogil
|
||||
cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) nogil
|
||||
cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) nogil
|
||||
cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) nogil
|
||||
cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) nogil
|
||||
cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) nogil
|
||||
cdef xmlBuffer* xmlBufferCreate() nogil
|
||||
cdef void xmlBufferWriteChar(xmlBuffer* buf, char* string) nogil
|
||||
cdef void xmlBufferFree(xmlBuffer* buf) nogil
|
||||
cdef const_xmlChar* xmlBufferContent(xmlBuffer* buf) nogil
|
||||
cdef int xmlBufferLength(xmlBuffer* buf) nogil
|
||||
cdef const_xmlChar* xmlBufContent(xmlBuf* buf) nogil # new in libxml2 2.9
|
||||
cdef size_t xmlBufUse(xmlBuf* buf) nogil # new in libxml2 2.9
|
||||
cdef int xmlKeepBlanksDefault(int val) nogil
|
||||
cdef xmlChar* xmlNodeGetBase(xmlDoc* doc, xmlNode* node) nogil
|
||||
cdef xmlDtd* xmlCreateIntSubset(xmlDoc* doc, const_xmlChar* name,
|
||||
const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||
cdef void xmlNodeSetBase(xmlNode* node, const_xmlChar* uri) nogil
|
||||
cdef int xmlValidateNCName(const_xmlChar* value, int space) nogil
|
||||
|
||||
cdef extern from "libxml/uri.h":
|
||||
cdef const_xmlChar* xmlBuildURI(const_xmlChar* href, const_xmlChar* base) nogil
|
||||
|
||||
cdef extern from "libxml/HTMLtree.h":
|
||||
cdef void htmlNodeDumpFormatOutput(xmlOutputBuffer* buf,
|
||||
xmlDoc* doc, xmlNode* cur,
|
||||
char* encoding, int format) nogil
|
||||
cdef xmlDoc* htmlNewDoc(const_xmlChar* uri, const_xmlChar* externalID) nogil
|
||||
|
||||
cdef extern from "libxml/valid.h":
|
||||
cdef xmlAttr* xmlGetID(xmlDoc* doc, const_xmlChar* ID) nogil
|
||||
cdef void xmlDumpNotationTable(xmlBuffer* buffer,
|
||||
xmlNotationTable* table) nogil
|
||||
cdef int xmlValidateNameValue(const_xmlChar* value) nogil
|
||||
|
||||
cdef extern from "libxml/xmlIO.h":
|
||||
cdef int xmlOutputBufferWrite(xmlOutputBuffer* out,
|
||||
int len, const_char* str) nogil
|
||||
cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, const_char* str) nogil
|
||||
cdef int xmlOutputBufferWriteEscape(xmlOutputBuffer* out,
|
||||
const_xmlChar* str,
|
||||
xmlCharEncodingOutputFunc escapefunc) nogil
|
||||
cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) nogil
|
||||
cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil
|
||||
|
||||
ctypedef int (*xmlInputReadCallback)(void* context,
|
||||
char* buffer, int len)
|
||||
ctypedef int (*xmlInputCloseCallback)(void* context)
|
||||
|
||||
ctypedef int (*xmlOutputWriteCallback)(void* context,
|
||||
char* buffer, int len)
|
||||
ctypedef int (*xmlOutputCloseCallback)(void* context)
|
||||
|
||||
cdef xmlOutputBuffer* xmlAllocOutputBuffer(
|
||||
xmlCharEncodingHandler* encoder) nogil
|
||||
cdef xmlOutputBuffer* xmlOutputBufferCreateIO(
|
||||
xmlOutputWriteCallback iowrite,
|
||||
xmlOutputCloseCallback ioclose,
|
||||
void * ioctx,
|
||||
xmlCharEncodingHandler* encoder) nogil
|
||||
cdef xmlOutputBuffer* xmlOutputBufferCreateFile(
|
||||
stdio.FILE* file, xmlCharEncodingHandler* encoder) nogil
|
||||
cdef xmlOutputBuffer* xmlOutputBufferCreateFilename(
|
||||
char* URI, xmlCharEncodingHandler* encoder, int compression) nogil
|
||||
|
||||
cdef extern from "libxml/xmlsave.h":
|
||||
ctypedef struct xmlSaveCtxt
|
||||
|
||||
ctypedef enum xmlSaveOption:
|
||||
XML_SAVE_FORMAT = 1 # format save output (2.6.17)
|
||||
XML_SAVE_NO_DECL = 2 # drop the xml declaration (2.6.21)
|
||||
XML_SAVE_NO_EMPTY = 4 # no empty tags (2.6.22)
|
||||
XML_SAVE_NO_XHTML = 8 # disable XHTML1 specific rules (2.6.22)
|
||||
XML_SAVE_XHTML = 16 # force XHTML1 specific rules (2.7.2)
|
||||
XML_SAVE_AS_XML = 32 # force XML serialization on HTML doc (2.7.2)
|
||||
XML_SAVE_AS_HTML = 64 # force HTML serialization on XML doc (2.7.2)
|
||||
|
||||
cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlSaveCtxt* xmlSaveToBuffer(xmlBuffer* buffer, char* encoding,
|
||||
int options) nogil # libxml2 2.6.23
|
||||
cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) nogil
|
||||
cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node) nogil
|
||||
cdef int xmlSaveClose(xmlSaveCtxt* ctxt) nogil
|
||||
cdef int xmlSaveFlush(xmlSaveCtxt* ctxt) nogil
|
||||
cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil
|
||||
cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func) nogil
|
||||
|
||||
cdef extern from "libxml/globals.h":
|
||||
cdef int xmlThrDefKeepBlanksDefaultValue(int onoff) nogil
|
||||
cdef int xmlThrDefLineNumbersDefaultValue(int onoff) nogil
|
||||
cdef int xmlThrDefIndentTreeOutput(int onoff) nogil
|
||||
|
||||
cdef extern from "libxml/xmlmemory.h" nogil:
|
||||
cdef void* xmlMalloc(size_t size)
|
||||
cdef int xmlMemBlocks()
|
||||
cdef int xmlMemUsed()
|
||||
cdef void xmlMemDisplay(stdio.FILE* file)
|
||||
cdef void xmlMemDisplayLast(stdio.FILE* file, long num_bytes)
|
||||
cdef void xmlMemShow(stdio.FILE* file, int count)
|
||||
|
||||
cdef extern from "etree_defs.h":
|
||||
cdef bint _isElement(xmlNode* node) nogil
|
||||
cdef bint _isElementOrXInclude(xmlNode* node) nogil
|
||||
cdef const_xmlChar* _getNs(xmlNode* node) nogil
|
||||
cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top,
|
||||
xmlNode* start_node,
|
||||
bint inclusive) nogil
|
||||
cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) nogil
|
||||
cdef void BEGIN_FOR_EACH_FROM(xmlNode* tree_top,
|
||||
xmlNode* start_node,
|
||||
bint inclusive) nogil
|
||||
cdef void END_FOR_EACH_FROM(xmlNode* start_node) nogil
|
||||
5
lib/python3.5/site-packages/lxml/includes/uri.pxd
Normal file
5
lib/python3.5/site-packages/lxml/includes/uri.pxd
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
cdef extern from "libxml/uri.h":
|
||||
ctypedef struct xmlURI
|
||||
|
||||
cdef xmlURI* xmlParseURI(char* str)
|
||||
cdef void xmlFreeURI(xmlURI* uri)
|
||||
22
lib/python3.5/site-packages/lxml/includes/xinclude.pxd
Normal file
22
lib/python3.5/site-packages/lxml/includes/xinclude.pxd
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
from lxml.includes.tree cimport xmlDoc, xmlNode
|
||||
|
||||
cdef extern from "libxml/xinclude.h":
|
||||
|
||||
ctypedef struct xmlXIncludeCtxt
|
||||
|
||||
cdef int xmlXIncludeProcess(xmlDoc* doc) nogil
|
||||
cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) nogil
|
||||
cdef int xmlXIncludeProcessTree(xmlNode* doc) nogil
|
||||
cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) nogil
|
||||
|
||||
# libxml2 >= 2.7.4
|
||||
cdef int xmlXIncludeProcessTreeFlagsData(
|
||||
xmlNode* doc, int parser_opts, void* data) nogil
|
||||
|
||||
cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) nogil
|
||||
cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) nogil
|
||||
cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) nogil
|
||||
|
||||
# libxml2 >= 2.6.27
|
||||
cdef int xmlXIncludeProcessFlagsData(
|
||||
xmlDoc* doc, int flags, void* data) nogil
|
||||
850
lib/python3.5/site-packages/lxml/includes/xmlerror.pxd
Normal file
850
lib/python3.5/site-packages/lxml/includes/xmlerror.pxd
Normal file
|
|
@ -0,0 +1,850 @@
|
|||
|
||||
# --- BEGIN: GENERATED CONSTANTS ---
|
||||
|
||||
# This section is generated by the script 'update-error-constants.py'.
|
||||
|
||||
cdef extern from "libxml/xmlerror.h":
|
||||
ctypedef enum xmlErrorLevel:
|
||||
XML_ERR_NONE = 0
|
||||
XML_ERR_WARNING = 1 # A simple warning
|
||||
XML_ERR_ERROR = 2 # A recoverable error
|
||||
XML_ERR_FATAL = 3 # A fatal error
|
||||
|
||||
ctypedef enum xmlErrorDomain:
|
||||
XML_FROM_NONE = 0
|
||||
XML_FROM_PARSER = 1 # The XML parser
|
||||
XML_FROM_TREE = 2 # The tree module
|
||||
XML_FROM_NAMESPACE = 3 # The XML Namespace module
|
||||
XML_FROM_DTD = 4 # The XML DTD validation with parser contex
|
||||
XML_FROM_HTML = 5 # The HTML parser
|
||||
XML_FROM_MEMORY = 6 # The memory allocator
|
||||
XML_FROM_OUTPUT = 7 # The serialization code
|
||||
XML_FROM_IO = 8 # The Input/Output stack
|
||||
XML_FROM_FTP = 9 # The FTP module
|
||||
XML_FROM_HTTP = 10 # The HTTP module
|
||||
XML_FROM_XINCLUDE = 11 # The XInclude processing
|
||||
XML_FROM_XPATH = 12 # The XPath module
|
||||
XML_FROM_XPOINTER = 13 # The XPointer module
|
||||
XML_FROM_REGEXP = 14 # The regular expressions module
|
||||
XML_FROM_DATATYPE = 15 # The W3C XML Schemas Datatype module
|
||||
XML_FROM_SCHEMASP = 16 # The W3C XML Schemas parser module
|
||||
XML_FROM_SCHEMASV = 17 # The W3C XML Schemas validation module
|
||||
XML_FROM_RELAXNGP = 18 # The Relax-NG parser module
|
||||
XML_FROM_RELAXNGV = 19 # The Relax-NG validator module
|
||||
XML_FROM_CATALOG = 20 # The Catalog module
|
||||
XML_FROM_C14N = 21 # The Canonicalization module
|
||||
XML_FROM_XSLT = 22 # The XSLT engine from libxslt
|
||||
XML_FROM_VALID = 23 # The XML DTD validation with valid context
|
||||
XML_FROM_CHECK = 24 # The error checking module
|
||||
XML_FROM_WRITER = 25 # The xmlwriter module
|
||||
XML_FROM_MODULE = 26 # The dynamically loaded module modul
|
||||
XML_FROM_I18N = 27 # The module handling character conversion
|
||||
XML_FROM_SCHEMATRONV = 28 # The Schematron validator module
|
||||
XML_FROM_BUFFER = 29 # The buffers module
|
||||
XML_FROM_URI = 30 # The URI module
|
||||
|
||||
ctypedef enum xmlParserErrors:
|
||||
XML_ERR_OK = 0
|
||||
XML_ERR_INTERNAL_ERROR = 1
|
||||
XML_ERR_NO_MEMORY = 2
|
||||
XML_ERR_DOCUMENT_START = 3
|
||||
XML_ERR_DOCUMENT_EMPTY = 4
|
||||
XML_ERR_DOCUMENT_END = 5
|
||||
XML_ERR_INVALID_HEX_CHARREF = 6
|
||||
XML_ERR_INVALID_DEC_CHARREF = 7
|
||||
XML_ERR_INVALID_CHARREF = 8
|
||||
XML_ERR_INVALID_CHAR = 9
|
||||
XML_ERR_CHARREF_AT_EOF = 10
|
||||
XML_ERR_CHARREF_IN_PROLOG = 11
|
||||
XML_ERR_CHARREF_IN_EPILOG = 12
|
||||
XML_ERR_CHARREF_IN_DTD = 13
|
||||
XML_ERR_ENTITYREF_AT_EOF = 14
|
||||
XML_ERR_ENTITYREF_IN_PROLOG = 15
|
||||
XML_ERR_ENTITYREF_IN_EPILOG = 16
|
||||
XML_ERR_ENTITYREF_IN_DTD = 17
|
||||
XML_ERR_PEREF_AT_EOF = 18
|
||||
XML_ERR_PEREF_IN_PROLOG = 19
|
||||
XML_ERR_PEREF_IN_EPILOG = 20
|
||||
XML_ERR_PEREF_IN_INT_SUBSET = 21
|
||||
XML_ERR_ENTITYREF_NO_NAME = 22
|
||||
XML_ERR_ENTITYREF_SEMICOL_MISSING = 23
|
||||
XML_ERR_PEREF_NO_NAME = 24
|
||||
XML_ERR_PEREF_SEMICOL_MISSING = 25
|
||||
XML_ERR_UNDECLARED_ENTITY = 26
|
||||
XML_WAR_UNDECLARED_ENTITY = 27
|
||||
XML_ERR_UNPARSED_ENTITY = 28
|
||||
XML_ERR_ENTITY_IS_EXTERNAL = 29
|
||||
XML_ERR_ENTITY_IS_PARAMETER = 30
|
||||
XML_ERR_UNKNOWN_ENCODING = 31
|
||||
XML_ERR_UNSUPPORTED_ENCODING = 32
|
||||
XML_ERR_STRING_NOT_STARTED = 33
|
||||
XML_ERR_STRING_NOT_CLOSED = 34
|
||||
XML_ERR_NS_DECL_ERROR = 35
|
||||
XML_ERR_ENTITY_NOT_STARTED = 36
|
||||
XML_ERR_ENTITY_NOT_FINISHED = 37
|
||||
XML_ERR_LT_IN_ATTRIBUTE = 38
|
||||
XML_ERR_ATTRIBUTE_NOT_STARTED = 39
|
||||
XML_ERR_ATTRIBUTE_NOT_FINISHED = 40
|
||||
XML_ERR_ATTRIBUTE_WITHOUT_VALUE = 41
|
||||
XML_ERR_ATTRIBUTE_REDEFINED = 42
|
||||
XML_ERR_LITERAL_NOT_STARTED = 43
|
||||
XML_ERR_LITERAL_NOT_FINISHED = 44
|
||||
XML_ERR_COMMENT_NOT_FINISHED = 45
|
||||
XML_ERR_PI_NOT_STARTED = 46
|
||||
XML_ERR_PI_NOT_FINISHED = 47
|
||||
XML_ERR_NOTATION_NOT_STARTED = 48
|
||||
XML_ERR_NOTATION_NOT_FINISHED = 49
|
||||
XML_ERR_ATTLIST_NOT_STARTED = 50
|
||||
XML_ERR_ATTLIST_NOT_FINISHED = 51
|
||||
XML_ERR_MIXED_NOT_STARTED = 52
|
||||
XML_ERR_MIXED_NOT_FINISHED = 53
|
||||
XML_ERR_ELEMCONTENT_NOT_STARTED = 54
|
||||
XML_ERR_ELEMCONTENT_NOT_FINISHED = 55
|
||||
XML_ERR_XMLDECL_NOT_STARTED = 56
|
||||
XML_ERR_XMLDECL_NOT_FINISHED = 57
|
||||
XML_ERR_CONDSEC_NOT_STARTED = 58
|
||||
XML_ERR_CONDSEC_NOT_FINISHED = 59
|
||||
XML_ERR_EXT_SUBSET_NOT_FINISHED = 60
|
||||
XML_ERR_DOCTYPE_NOT_FINISHED = 61
|
||||
XML_ERR_MISPLACED_CDATA_END = 62
|
||||
XML_ERR_CDATA_NOT_FINISHED = 63
|
||||
XML_ERR_RESERVED_XML_NAME = 64
|
||||
XML_ERR_SPACE_REQUIRED = 65
|
||||
XML_ERR_SEPARATOR_REQUIRED = 66
|
||||
XML_ERR_NMTOKEN_REQUIRED = 67
|
||||
XML_ERR_NAME_REQUIRED = 68
|
||||
XML_ERR_PCDATA_REQUIRED = 69
|
||||
XML_ERR_URI_REQUIRED = 70
|
||||
XML_ERR_PUBID_REQUIRED = 71
|
||||
XML_ERR_LT_REQUIRED = 72
|
||||
XML_ERR_GT_REQUIRED = 73
|
||||
XML_ERR_LTSLASH_REQUIRED = 74
|
||||
XML_ERR_EQUAL_REQUIRED = 75
|
||||
XML_ERR_TAG_NAME_MISMATCH = 76
|
||||
XML_ERR_TAG_NOT_FINISHED = 77
|
||||
XML_ERR_STANDALONE_VALUE = 78
|
||||
XML_ERR_ENCODING_NAME = 79
|
||||
XML_ERR_HYPHEN_IN_COMMENT = 80
|
||||
XML_ERR_INVALID_ENCODING = 81
|
||||
XML_ERR_EXT_ENTITY_STANDALONE = 82
|
||||
XML_ERR_CONDSEC_INVALID = 83
|
||||
XML_ERR_VALUE_REQUIRED = 84
|
||||
XML_ERR_NOT_WELL_BALANCED = 85
|
||||
XML_ERR_EXTRA_CONTENT = 86
|
||||
XML_ERR_ENTITY_CHAR_ERROR = 87
|
||||
XML_ERR_ENTITY_PE_INTERNAL = 88
|
||||
XML_ERR_ENTITY_LOOP = 89
|
||||
XML_ERR_ENTITY_BOUNDARY = 90
|
||||
XML_ERR_INVALID_URI = 91
|
||||
XML_ERR_URI_FRAGMENT = 92
|
||||
XML_WAR_CATALOG_PI = 93
|
||||
XML_ERR_NO_DTD = 94
|
||||
XML_ERR_CONDSEC_INVALID_KEYWORD = 95
|
||||
XML_ERR_VERSION_MISSING = 96
|
||||
XML_WAR_UNKNOWN_VERSION = 97
|
||||
XML_WAR_LANG_VALUE = 98
|
||||
XML_WAR_NS_URI = 99
|
||||
XML_WAR_NS_URI_RELATIVE = 100
|
||||
XML_ERR_MISSING_ENCODING = 101
|
||||
XML_WAR_SPACE_VALUE = 102
|
||||
XML_ERR_NOT_STANDALONE = 103
|
||||
XML_ERR_ENTITY_PROCESSING = 104
|
||||
XML_ERR_NOTATION_PROCESSING = 105
|
||||
XML_WAR_NS_COLUMN = 106
|
||||
XML_WAR_ENTITY_REDEFINED = 107
|
||||
XML_ERR_UNKNOWN_VERSION = 108
|
||||
XML_ERR_VERSION_MISMATCH = 109
|
||||
XML_ERR_NAME_TOO_LONG = 110
|
||||
XML_ERR_USER_STOP = 111
|
||||
XML_NS_ERR_XML_NAMESPACE = 200
|
||||
XML_NS_ERR_UNDEFINED_NAMESPACE = 201
|
||||
XML_NS_ERR_QNAME = 202
|
||||
XML_NS_ERR_ATTRIBUTE_REDEFINED = 203
|
||||
XML_NS_ERR_EMPTY = 204
|
||||
XML_NS_ERR_COLON = 205
|
||||
XML_DTD_ATTRIBUTE_DEFAULT = 500
|
||||
XML_DTD_ATTRIBUTE_REDEFINED = 501
|
||||
XML_DTD_ATTRIBUTE_VALUE = 502
|
||||
XML_DTD_CONTENT_ERROR = 503
|
||||
XML_DTD_CONTENT_MODEL = 504
|
||||
XML_DTD_CONTENT_NOT_DETERMINIST = 505
|
||||
XML_DTD_DIFFERENT_PREFIX = 506
|
||||
XML_DTD_ELEM_DEFAULT_NAMESPACE = 507
|
||||
XML_DTD_ELEM_NAMESPACE = 508
|
||||
XML_DTD_ELEM_REDEFINED = 509
|
||||
XML_DTD_EMPTY_NOTATION = 510
|
||||
XML_DTD_ENTITY_TYPE = 511
|
||||
XML_DTD_ID_FIXED = 512
|
||||
XML_DTD_ID_REDEFINED = 513
|
||||
XML_DTD_ID_SUBSET = 514
|
||||
XML_DTD_INVALID_CHILD = 515
|
||||
XML_DTD_INVALID_DEFAULT = 516
|
||||
XML_DTD_LOAD_ERROR = 517
|
||||
XML_DTD_MISSING_ATTRIBUTE = 518
|
||||
XML_DTD_MIXED_CORRUPT = 519
|
||||
XML_DTD_MULTIPLE_ID = 520
|
||||
XML_DTD_NO_DOC = 521
|
||||
XML_DTD_NO_DTD = 522
|
||||
XML_DTD_NO_ELEM_NAME = 523
|
||||
XML_DTD_NO_PREFIX = 524
|
||||
XML_DTD_NO_ROOT = 525
|
||||
XML_DTD_NOTATION_REDEFINED = 526
|
||||
XML_DTD_NOTATION_VALUE = 527
|
||||
XML_DTD_NOT_EMPTY = 528
|
||||
XML_DTD_NOT_PCDATA = 529
|
||||
XML_DTD_NOT_STANDALONE = 530
|
||||
XML_DTD_ROOT_NAME = 531
|
||||
XML_DTD_STANDALONE_WHITE_SPACE = 532
|
||||
XML_DTD_UNKNOWN_ATTRIBUTE = 533
|
||||
XML_DTD_UNKNOWN_ELEM = 534
|
||||
XML_DTD_UNKNOWN_ENTITY = 535
|
||||
XML_DTD_UNKNOWN_ID = 536
|
||||
XML_DTD_UNKNOWN_NOTATION = 537
|
||||
XML_DTD_STANDALONE_DEFAULTED = 538
|
||||
XML_DTD_XMLID_VALUE = 539
|
||||
XML_DTD_XMLID_TYPE = 540
|
||||
XML_DTD_DUP_TOKEN = 541
|
||||
XML_HTML_STRUCURE_ERROR = 800
|
||||
XML_HTML_UNKNOWN_TAG = 801
|
||||
XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000
|
||||
XML_RNGP_ATTR_CONFLICT = 1001
|
||||
XML_RNGP_ATTRIBUTE_CHILDREN = 1002
|
||||
XML_RNGP_ATTRIBUTE_CONTENT = 1003
|
||||
XML_RNGP_ATTRIBUTE_EMPTY = 1004
|
||||
XML_RNGP_ATTRIBUTE_NOOP = 1005
|
||||
XML_RNGP_CHOICE_CONTENT = 1006
|
||||
XML_RNGP_CHOICE_EMPTY = 1007
|
||||
XML_RNGP_CREATE_FAILURE = 1008
|
||||
XML_RNGP_DATA_CONTENT = 1009
|
||||
XML_RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010
|
||||
XML_RNGP_DEFINE_CREATE_FAILED = 1011
|
||||
XML_RNGP_DEFINE_EMPTY = 1012
|
||||
XML_RNGP_DEFINE_MISSING = 1013
|
||||
XML_RNGP_DEFINE_NAME_MISSING = 1014
|
||||
XML_RNGP_ELEM_CONTENT_EMPTY = 1015
|
||||
XML_RNGP_ELEM_CONTENT_ERROR = 1016
|
||||
XML_RNGP_ELEMENT_EMPTY = 1017
|
||||
XML_RNGP_ELEMENT_CONTENT = 1018
|
||||
XML_RNGP_ELEMENT_NAME = 1019
|
||||
XML_RNGP_ELEMENT_NO_CONTENT = 1020
|
||||
XML_RNGP_ELEM_TEXT_CONFLICT = 1021
|
||||
XML_RNGP_EMPTY = 1022
|
||||
XML_RNGP_EMPTY_CONSTRUCT = 1023
|
||||
XML_RNGP_EMPTY_CONTENT = 1024
|
||||
XML_RNGP_EMPTY_NOT_EMPTY = 1025
|
||||
XML_RNGP_ERROR_TYPE_LIB = 1026
|
||||
XML_RNGP_EXCEPT_EMPTY = 1027
|
||||
XML_RNGP_EXCEPT_MISSING = 1028
|
||||
XML_RNGP_EXCEPT_MULTIPLE = 1029
|
||||
XML_RNGP_EXCEPT_NO_CONTENT = 1030
|
||||
XML_RNGP_EXTERNALREF_EMTPY = 1031
|
||||
XML_RNGP_EXTERNAL_REF_FAILURE = 1032
|
||||
XML_RNGP_EXTERNALREF_RECURSE = 1033
|
||||
XML_RNGP_FORBIDDEN_ATTRIBUTE = 1034
|
||||
XML_RNGP_FOREIGN_ELEMENT = 1035
|
||||
XML_RNGP_GRAMMAR_CONTENT = 1036
|
||||
XML_RNGP_GRAMMAR_EMPTY = 1037
|
||||
XML_RNGP_GRAMMAR_MISSING = 1038
|
||||
XML_RNGP_GRAMMAR_NO_START = 1039
|
||||
XML_RNGP_GROUP_ATTR_CONFLICT = 1040
|
||||
XML_RNGP_HREF_ERROR = 1041
|
||||
XML_RNGP_INCLUDE_EMPTY = 1042
|
||||
XML_RNGP_INCLUDE_FAILURE = 1043
|
||||
XML_RNGP_INCLUDE_RECURSE = 1044
|
||||
XML_RNGP_INTERLEAVE_ADD = 1045
|
||||
XML_RNGP_INTERLEAVE_CREATE_FAILED = 1046
|
||||
XML_RNGP_INTERLEAVE_EMPTY = 1047
|
||||
XML_RNGP_INTERLEAVE_NO_CONTENT = 1048
|
||||
XML_RNGP_INVALID_DEFINE_NAME = 1049
|
||||
XML_RNGP_INVALID_URI = 1050
|
||||
XML_RNGP_INVALID_VALUE = 1051
|
||||
XML_RNGP_MISSING_HREF = 1052
|
||||
XML_RNGP_NAME_MISSING = 1053
|
||||
XML_RNGP_NEED_COMBINE = 1054
|
||||
XML_RNGP_NOTALLOWED_NOT_EMPTY = 1055
|
||||
XML_RNGP_NSNAME_ATTR_ANCESTOR = 1056
|
||||
XML_RNGP_NSNAME_NO_NS = 1057
|
||||
XML_RNGP_PARAM_FORBIDDEN = 1058
|
||||
XML_RNGP_PARAM_NAME_MISSING = 1059
|
||||
XML_RNGP_PARENTREF_CREATE_FAILED = 1060
|
||||
XML_RNGP_PARENTREF_NAME_INVALID = 1061
|
||||
XML_RNGP_PARENTREF_NO_NAME = 1062
|
||||
XML_RNGP_PARENTREF_NO_PARENT = 1063
|
||||
XML_RNGP_PARENTREF_NOT_EMPTY = 1064
|
||||
XML_RNGP_PARSE_ERROR = 1065
|
||||
XML_RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066
|
||||
XML_RNGP_PAT_ATTR_ATTR = 1067
|
||||
XML_RNGP_PAT_ATTR_ELEM = 1068
|
||||
XML_RNGP_PAT_DATA_EXCEPT_ATTR = 1069
|
||||
XML_RNGP_PAT_DATA_EXCEPT_ELEM = 1070
|
||||
XML_RNGP_PAT_DATA_EXCEPT_EMPTY = 1071
|
||||
XML_RNGP_PAT_DATA_EXCEPT_GROUP = 1072
|
||||
XML_RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073
|
||||
XML_RNGP_PAT_DATA_EXCEPT_LIST = 1074
|
||||
XML_RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075
|
||||
XML_RNGP_PAT_DATA_EXCEPT_REF = 1076
|
||||
XML_RNGP_PAT_DATA_EXCEPT_TEXT = 1077
|
||||
XML_RNGP_PAT_LIST_ATTR = 1078
|
||||
XML_RNGP_PAT_LIST_ELEM = 1079
|
||||
XML_RNGP_PAT_LIST_INTERLEAVE = 1080
|
||||
XML_RNGP_PAT_LIST_LIST = 1081
|
||||
XML_RNGP_PAT_LIST_REF = 1082
|
||||
XML_RNGP_PAT_LIST_TEXT = 1083
|
||||
XML_RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084
|
||||
XML_RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085
|
||||
XML_RNGP_PAT_ONEMORE_GROUP_ATTR = 1086
|
||||
XML_RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087
|
||||
XML_RNGP_PAT_START_ATTR = 1088
|
||||
XML_RNGP_PAT_START_DATA = 1089
|
||||
XML_RNGP_PAT_START_EMPTY = 1090
|
||||
XML_RNGP_PAT_START_GROUP = 1091
|
||||
XML_RNGP_PAT_START_INTERLEAVE = 1092
|
||||
XML_RNGP_PAT_START_LIST = 1093
|
||||
XML_RNGP_PAT_START_ONEMORE = 1094
|
||||
XML_RNGP_PAT_START_TEXT = 1095
|
||||
XML_RNGP_PAT_START_VALUE = 1096
|
||||
XML_RNGP_PREFIX_UNDEFINED = 1097
|
||||
XML_RNGP_REF_CREATE_FAILED = 1098
|
||||
XML_RNGP_REF_CYCLE = 1099
|
||||
XML_RNGP_REF_NAME_INVALID = 1100
|
||||
XML_RNGP_REF_NO_DEF = 1101
|
||||
XML_RNGP_REF_NO_NAME = 1102
|
||||
XML_RNGP_REF_NOT_EMPTY = 1103
|
||||
XML_RNGP_START_CHOICE_AND_INTERLEAVE = 1104
|
||||
XML_RNGP_START_CONTENT = 1105
|
||||
XML_RNGP_START_EMPTY = 1106
|
||||
XML_RNGP_START_MISSING = 1107
|
||||
XML_RNGP_TEXT_EXPECTED = 1108
|
||||
XML_RNGP_TEXT_HAS_CHILD = 1109
|
||||
XML_RNGP_TYPE_MISSING = 1110
|
||||
XML_RNGP_TYPE_NOT_FOUND = 1111
|
||||
XML_RNGP_TYPE_VALUE = 1112
|
||||
XML_RNGP_UNKNOWN_ATTRIBUTE = 1113
|
||||
XML_RNGP_UNKNOWN_COMBINE = 1114
|
||||
XML_RNGP_UNKNOWN_CONSTRUCT = 1115
|
||||
XML_RNGP_UNKNOWN_TYPE_LIB = 1116
|
||||
XML_RNGP_URI_FRAGMENT = 1117
|
||||
XML_RNGP_URI_NOT_ABSOLUTE = 1118
|
||||
XML_RNGP_VALUE_EMPTY = 1119
|
||||
XML_RNGP_VALUE_NO_CONTENT = 1120
|
||||
XML_RNGP_XMLNS_NAME = 1121
|
||||
XML_RNGP_XML_NS = 1122
|
||||
XML_XPATH_EXPRESSION_OK = 1200
|
||||
XML_XPATH_NUMBER_ERROR = 1201
|
||||
XML_XPATH_UNFINISHED_LITERAL_ERROR = 1202
|
||||
XML_XPATH_START_LITERAL_ERROR = 1203
|
||||
XML_XPATH_VARIABLE_REF_ERROR = 1204
|
||||
XML_XPATH_UNDEF_VARIABLE_ERROR = 1205
|
||||
XML_XPATH_INVALID_PREDICATE_ERROR = 1206
|
||||
XML_XPATH_EXPR_ERROR = 1207
|
||||
XML_XPATH_UNCLOSED_ERROR = 1208
|
||||
XML_XPATH_UNKNOWN_FUNC_ERROR = 1209
|
||||
XML_XPATH_INVALID_OPERAND = 1210
|
||||
XML_XPATH_INVALID_TYPE = 1211
|
||||
XML_XPATH_INVALID_ARITY = 1212
|
||||
XML_XPATH_INVALID_CTXT_SIZE = 1213
|
||||
XML_XPATH_INVALID_CTXT_POSITION = 1214
|
||||
XML_XPATH_MEMORY_ERROR = 1215
|
||||
XML_XPTR_SYNTAX_ERROR = 1216
|
||||
XML_XPTR_RESOURCE_ERROR = 1217
|
||||
XML_XPTR_SUB_RESOURCE_ERROR = 1218
|
||||
XML_XPATH_UNDEF_PREFIX_ERROR = 1219
|
||||
XML_XPATH_ENCODING_ERROR = 1220
|
||||
XML_XPATH_INVALID_CHAR_ERROR = 1221
|
||||
XML_TREE_INVALID_HEX = 1300
|
||||
XML_TREE_INVALID_DEC = 1301
|
||||
XML_TREE_UNTERMINATED_ENTITY = 1302
|
||||
XML_TREE_NOT_UTF8 = 1303
|
||||
XML_SAVE_NOT_UTF8 = 1400
|
||||
XML_SAVE_CHAR_INVALID = 1401
|
||||
XML_SAVE_NO_DOCTYPE = 1402
|
||||
XML_SAVE_UNKNOWN_ENCODING = 1403
|
||||
XML_REGEXP_COMPILE_ERROR = 1450
|
||||
XML_IO_UNKNOWN = 1500
|
||||
XML_IO_EACCES = 1501
|
||||
XML_IO_EAGAIN = 1502
|
||||
XML_IO_EBADF = 1503
|
||||
XML_IO_EBADMSG = 1504
|
||||
XML_IO_EBUSY = 1505
|
||||
XML_IO_ECANCELED = 1506
|
||||
XML_IO_ECHILD = 1507
|
||||
XML_IO_EDEADLK = 1508
|
||||
XML_IO_EDOM = 1509
|
||||
XML_IO_EEXIST = 1510
|
||||
XML_IO_EFAULT = 1511
|
||||
XML_IO_EFBIG = 1512
|
||||
XML_IO_EINPROGRESS = 1513
|
||||
XML_IO_EINTR = 1514
|
||||
XML_IO_EINVAL = 1515
|
||||
XML_IO_EIO = 1516
|
||||
XML_IO_EISDIR = 1517
|
||||
XML_IO_EMFILE = 1518
|
||||
XML_IO_EMLINK = 1519
|
||||
XML_IO_EMSGSIZE = 1520
|
||||
XML_IO_ENAMETOOLONG = 1521
|
||||
XML_IO_ENFILE = 1522
|
||||
XML_IO_ENODEV = 1523
|
||||
XML_IO_ENOENT = 1524
|
||||
XML_IO_ENOEXEC = 1525
|
||||
XML_IO_ENOLCK = 1526
|
||||
XML_IO_ENOMEM = 1527
|
||||
XML_IO_ENOSPC = 1528
|
||||
XML_IO_ENOSYS = 1529
|
||||
XML_IO_ENOTDIR = 1530
|
||||
XML_IO_ENOTEMPTY = 1531
|
||||
XML_IO_ENOTSUP = 1532
|
||||
XML_IO_ENOTTY = 1533
|
||||
XML_IO_ENXIO = 1534
|
||||
XML_IO_EPERM = 1535
|
||||
XML_IO_EPIPE = 1536
|
||||
XML_IO_ERANGE = 1537
|
||||
XML_IO_EROFS = 1538
|
||||
XML_IO_ESPIPE = 1539
|
||||
XML_IO_ESRCH = 1540
|
||||
XML_IO_ETIMEDOUT = 1541
|
||||
XML_IO_EXDEV = 1542
|
||||
XML_IO_NETWORK_ATTEMPT = 1543
|
||||
XML_IO_ENCODER = 1544
|
||||
XML_IO_FLUSH = 1545
|
||||
XML_IO_WRITE = 1546
|
||||
XML_IO_NO_INPUT = 1547
|
||||
XML_IO_BUFFER_FULL = 1548
|
||||
XML_IO_LOAD_ERROR = 1549
|
||||
XML_IO_ENOTSOCK = 1550
|
||||
XML_IO_EISCONN = 1551
|
||||
XML_IO_ECONNREFUSED = 1552
|
||||
XML_IO_ENETUNREACH = 1553
|
||||
XML_IO_EADDRINUSE = 1554
|
||||
XML_IO_EALREADY = 1555
|
||||
XML_IO_EAFNOSUPPORT = 1556
|
||||
XML_XINCLUDE_RECURSION = 1600
|
||||
XML_XINCLUDE_PARSE_VALUE = 1601
|
||||
XML_XINCLUDE_ENTITY_DEF_MISMATCH = 1602
|
||||
XML_XINCLUDE_NO_HREF = 1603
|
||||
XML_XINCLUDE_NO_FALLBACK = 1604
|
||||
XML_XINCLUDE_HREF_URI = 1605
|
||||
XML_XINCLUDE_TEXT_FRAGMENT = 1606
|
||||
XML_XINCLUDE_TEXT_DOCUMENT = 1607
|
||||
XML_XINCLUDE_INVALID_CHAR = 1608
|
||||
XML_XINCLUDE_BUILD_FAILED = 1609
|
||||
XML_XINCLUDE_UNKNOWN_ENCODING = 1610
|
||||
XML_XINCLUDE_MULTIPLE_ROOT = 1611
|
||||
XML_XINCLUDE_XPTR_FAILED = 1612
|
||||
XML_XINCLUDE_XPTR_RESULT = 1613
|
||||
XML_XINCLUDE_INCLUDE_IN_INCLUDE = 1614
|
||||
XML_XINCLUDE_FALLBACKS_IN_INCLUDE = 1615
|
||||
XML_XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616
|
||||
XML_XINCLUDE_DEPRECATED_NS = 1617
|
||||
XML_XINCLUDE_FRAGMENT_ID = 1618
|
||||
XML_CATALOG_MISSING_ATTR = 1650
|
||||
XML_CATALOG_ENTRY_BROKEN = 1651
|
||||
XML_CATALOG_PREFER_VALUE = 1652
|
||||
XML_CATALOG_NOT_CATALOG = 1653
|
||||
XML_CATALOG_RECURSION = 1654
|
||||
XML_SCHEMAP_PREFIX_UNDEFINED = 1700
|
||||
XML_SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701
|
||||
XML_SCHEMAP_ATTRGRP_NONAME_NOREF = 1702
|
||||
XML_SCHEMAP_ATTR_NONAME_NOREF = 1703
|
||||
XML_SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704
|
||||
XML_SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705
|
||||
XML_SCHEMAP_ELEM_NONAME_NOREF = 1706
|
||||
XML_SCHEMAP_EXTENSION_NO_BASE = 1707
|
||||
XML_SCHEMAP_FACET_NO_VALUE = 1708
|
||||
XML_SCHEMAP_FAILED_BUILD_IMPORT = 1709
|
||||
XML_SCHEMAP_GROUP_NONAME_NOREF = 1710
|
||||
XML_SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711
|
||||
XML_SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712
|
||||
XML_SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713
|
||||
XML_SCHEMAP_INVALID_BOOLEAN = 1714
|
||||
XML_SCHEMAP_INVALID_ENUM = 1715
|
||||
XML_SCHEMAP_INVALID_FACET = 1716
|
||||
XML_SCHEMAP_INVALID_FACET_VALUE = 1717
|
||||
XML_SCHEMAP_INVALID_MAXOCCURS = 1718
|
||||
XML_SCHEMAP_INVALID_MINOCCURS = 1719
|
||||
XML_SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720
|
||||
XML_SCHEMAP_INVALID_WHITE_SPACE = 1721
|
||||
XML_SCHEMAP_NOATTR_NOREF = 1722
|
||||
XML_SCHEMAP_NOTATION_NO_NAME = 1723
|
||||
XML_SCHEMAP_NOTYPE_NOREF = 1724
|
||||
XML_SCHEMAP_REF_AND_SUBTYPE = 1725
|
||||
XML_SCHEMAP_RESTRICTION_NONAME_NOREF = 1726
|
||||
XML_SCHEMAP_SIMPLETYPE_NONAME = 1727
|
||||
XML_SCHEMAP_TYPE_AND_SUBTYPE = 1728
|
||||
XML_SCHEMAP_UNKNOWN_ALL_CHILD = 1729
|
||||
XML_SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730
|
||||
XML_SCHEMAP_UNKNOWN_ATTR_CHILD = 1731
|
||||
XML_SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732
|
||||
XML_SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733
|
||||
XML_SCHEMAP_UNKNOWN_BASE_TYPE = 1734
|
||||
XML_SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735
|
||||
XML_SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736
|
||||
XML_SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737
|
||||
XML_SCHEMAP_UNKNOWN_ELEM_CHILD = 1738
|
||||
XML_SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739
|
||||
XML_SCHEMAP_UNKNOWN_FACET_CHILD = 1740
|
||||
XML_SCHEMAP_UNKNOWN_FACET_TYPE = 1741
|
||||
XML_SCHEMAP_UNKNOWN_GROUP_CHILD = 1742
|
||||
XML_SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743
|
||||
XML_SCHEMAP_UNKNOWN_LIST_CHILD = 1744
|
||||
XML_SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745
|
||||
XML_SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746
|
||||
XML_SCHEMAP_UNKNOWN_REF = 1747
|
||||
XML_SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748
|
||||
XML_SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749
|
||||
XML_SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750
|
||||
XML_SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751
|
||||
XML_SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752
|
||||
XML_SCHEMAP_UNKNOWN_TYPE = 1753
|
||||
XML_SCHEMAP_UNKNOWN_UNION_CHILD = 1754
|
||||
XML_SCHEMAP_ELEM_DEFAULT_FIXED = 1755
|
||||
XML_SCHEMAP_REGEXP_INVALID = 1756
|
||||
XML_SCHEMAP_FAILED_LOAD = 1757
|
||||
XML_SCHEMAP_NOTHING_TO_PARSE = 1758
|
||||
XML_SCHEMAP_NOROOT = 1759
|
||||
XML_SCHEMAP_REDEFINED_GROUP = 1760
|
||||
XML_SCHEMAP_REDEFINED_TYPE = 1761
|
||||
XML_SCHEMAP_REDEFINED_ELEMENT = 1762
|
||||
XML_SCHEMAP_REDEFINED_ATTRGROUP = 1763
|
||||
XML_SCHEMAP_REDEFINED_ATTR = 1764
|
||||
XML_SCHEMAP_REDEFINED_NOTATION = 1765
|
||||
XML_SCHEMAP_FAILED_PARSE = 1766
|
||||
XML_SCHEMAP_UNKNOWN_PREFIX = 1767
|
||||
XML_SCHEMAP_DEF_AND_PREFIX = 1768
|
||||
XML_SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769
|
||||
XML_SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770
|
||||
XML_SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771
|
||||
XML_SCHEMAP_NOT_SCHEMA = 1772
|
||||
XML_SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773
|
||||
XML_SCHEMAP_INVALID_ATTR_USE = 1774
|
||||
XML_SCHEMAP_RECURSIVE = 1775
|
||||
XML_SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776
|
||||
XML_SCHEMAP_INVALID_ATTR_COMBINATION = 1777
|
||||
XML_SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778
|
||||
XML_SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779
|
||||
XML_SCHEMAP_INVALID_ATTR_NAME = 1780
|
||||
XML_SCHEMAP_REF_AND_CONTENT = 1781
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_1 = 1782
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_2 = 1783
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_3 = 1784
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_4 = 1785
|
||||
XML_SCHEMAP_CT_PROPS_CORRECT_5 = 1786
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791
|
||||
XML_SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792
|
||||
XML_SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793
|
||||
XML_SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794
|
||||
XML_SCHEMAP_SRC_IMPORT_3_1 = 1795
|
||||
XML_SCHEMAP_SRC_IMPORT_3_2 = 1796
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799
|
||||
XML_SCHEMAP_COS_CT_EXTENDS_1_3 = 1800
|
||||
XML_SCHEMAV_NOROOT = 1801
|
||||
XML_SCHEMAV_UNDECLAREDELEM = 1802
|
||||
XML_SCHEMAV_NOTTOPLEVEL = 1803
|
||||
XML_SCHEMAV_MISSING = 1804
|
||||
XML_SCHEMAV_WRONGELEM = 1805
|
||||
XML_SCHEMAV_NOTYPE = 1806
|
||||
XML_SCHEMAV_NOROLLBACK = 1807
|
||||
XML_SCHEMAV_ISABSTRACT = 1808
|
||||
XML_SCHEMAV_NOTEMPTY = 1809
|
||||
XML_SCHEMAV_ELEMCONT = 1810
|
||||
XML_SCHEMAV_HAVEDEFAULT = 1811
|
||||
XML_SCHEMAV_NOTNILLABLE = 1812
|
||||
XML_SCHEMAV_EXTRACONTENT = 1813
|
||||
XML_SCHEMAV_INVALIDATTR = 1814
|
||||
XML_SCHEMAV_INVALIDELEM = 1815
|
||||
XML_SCHEMAV_NOTDETERMINIST = 1816
|
||||
XML_SCHEMAV_CONSTRUCT = 1817
|
||||
XML_SCHEMAV_INTERNAL = 1818
|
||||
XML_SCHEMAV_NOTSIMPLE = 1819
|
||||
XML_SCHEMAV_ATTRUNKNOWN = 1820
|
||||
XML_SCHEMAV_ATTRINVALID = 1821
|
||||
XML_SCHEMAV_VALUE = 1822
|
||||
XML_SCHEMAV_FACET = 1823
|
||||
XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824
|
||||
XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825
|
||||
XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826
|
||||
XML_SCHEMAV_CVC_TYPE_3_1_1 = 1827
|
||||
XML_SCHEMAV_CVC_TYPE_3_1_2 = 1828
|
||||
XML_SCHEMAV_CVC_FACET_VALID = 1829
|
||||
XML_SCHEMAV_CVC_LENGTH_VALID = 1830
|
||||
XML_SCHEMAV_CVC_MINLENGTH_VALID = 1831
|
||||
XML_SCHEMAV_CVC_MAXLENGTH_VALID = 1832
|
||||
XML_SCHEMAV_CVC_MININCLUSIVE_VALID = 1833
|
||||
XML_SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834
|
||||
XML_SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835
|
||||
XML_SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836
|
||||
XML_SCHEMAV_CVC_TOTALDIGITS_VALID = 1837
|
||||
XML_SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838
|
||||
XML_SCHEMAV_CVC_PATTERN_VALID = 1839
|
||||
XML_SCHEMAV_CVC_ENUMERATION_VALID = 1840
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844
|
||||
XML_SCHEMAV_CVC_ELT_1 = 1845
|
||||
XML_SCHEMAV_CVC_ELT_2 = 1846
|
||||
XML_SCHEMAV_CVC_ELT_3_1 = 1847
|
||||
XML_SCHEMAV_CVC_ELT_3_2_1 = 1848
|
||||
XML_SCHEMAV_CVC_ELT_3_2_2 = 1849
|
||||
XML_SCHEMAV_CVC_ELT_4_1 = 1850
|
||||
XML_SCHEMAV_CVC_ELT_4_2 = 1851
|
||||
XML_SCHEMAV_CVC_ELT_4_3 = 1852
|
||||
XML_SCHEMAV_CVC_ELT_5_1_1 = 1853
|
||||
XML_SCHEMAV_CVC_ELT_5_1_2 = 1854
|
||||
XML_SCHEMAV_CVC_ELT_5_2_1 = 1855
|
||||
XML_SCHEMAV_CVC_ELT_5_2_2_1 = 1856
|
||||
XML_SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857
|
||||
XML_SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858
|
||||
XML_SCHEMAV_CVC_ELT_6 = 1859
|
||||
XML_SCHEMAV_CVC_ELT_7 = 1860
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_1 = 1861
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_2 = 1862
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_3 = 1863
|
||||
XML_SCHEMAV_CVC_ATTRIBUTE_4 = 1864
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870
|
||||
XML_SCHEMAV_ELEMENT_CONTENT = 1871
|
||||
XML_SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872
|
||||
XML_SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873
|
||||
XML_SCHEMAV_CVC_AU = 1874
|
||||
XML_SCHEMAV_CVC_TYPE_1 = 1875
|
||||
XML_SCHEMAV_CVC_TYPE_2 = 1876
|
||||
XML_SCHEMAV_CVC_IDC = 1877
|
||||
XML_SCHEMAV_CVC_WILDCARD = 1878
|
||||
XML_SCHEMAV_MISC = 1879
|
||||
XML_XPTR_UNKNOWN_SCHEME = 1900
|
||||
XML_XPTR_CHILDSEQ_START = 1901
|
||||
XML_XPTR_EVAL_FAILED = 1902
|
||||
XML_XPTR_EXTRA_OBJECTS = 1903
|
||||
XML_C14N_CREATE_CTXT = 1950
|
||||
XML_C14N_REQUIRES_UTF8 = 1951
|
||||
XML_C14N_CREATE_STACK = 1952
|
||||
XML_C14N_INVALID_NODE = 1953
|
||||
XML_C14N_UNKNOW_NODE = 1954
|
||||
XML_C14N_RELATIVE_NAMESPACE = 1955
|
||||
XML_FTP_PASV_ANSWER = 2000
|
||||
XML_FTP_EPSV_ANSWER = 2001
|
||||
XML_FTP_ACCNT = 2002
|
||||
XML_FTP_URL_SYNTAX = 2003
|
||||
XML_HTTP_URL_SYNTAX = 2020
|
||||
XML_HTTP_USE_IP = 2021
|
||||
XML_HTTP_UNKNOWN_HOST = 2022
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002
|
||||
XML_SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003
|
||||
XML_SCHEMAP_SRC_RESOLVE = 3004
|
||||
XML_SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005
|
||||
XML_SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006
|
||||
XML_SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007
|
||||
XML_SCHEMAP_ST_PROPS_CORRECT_1 = 3008
|
||||
XML_SCHEMAP_ST_PROPS_CORRECT_2 = 3009
|
||||
XML_SCHEMAP_ST_PROPS_CORRECT_3 = 3010
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029
|
||||
XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030
|
||||
XML_SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031
|
||||
XML_SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032
|
||||
XML_SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033
|
||||
XML_SCHEMAP_S4S_ELEM_MISSING = 3034
|
||||
XML_SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035
|
||||
XML_SCHEMAP_S4S_ATTR_MISSING = 3036
|
||||
XML_SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037
|
||||
XML_SCHEMAP_SRC_ELEMENT_1 = 3038
|
||||
XML_SCHEMAP_SRC_ELEMENT_2_1 = 3039
|
||||
XML_SCHEMAP_SRC_ELEMENT_2_2 = 3040
|
||||
XML_SCHEMAP_SRC_ELEMENT_3 = 3041
|
||||
XML_SCHEMAP_P_PROPS_CORRECT_1 = 3042
|
||||
XML_SCHEMAP_P_PROPS_CORRECT_2_1 = 3043
|
||||
XML_SCHEMAP_P_PROPS_CORRECT_2_2 = 3044
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_2 = 3045
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_3 = 3046
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_4 = 3047
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_5 = 3048
|
||||
XML_SCHEMAP_E_PROPS_CORRECT_6 = 3049
|
||||
XML_SCHEMAP_SRC_INCLUDE = 3050
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_1 = 3051
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_2 = 3052
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_4 = 3055
|
||||
XML_SCHEMAP_NO_XMLNS = 3056
|
||||
XML_SCHEMAP_NO_XSI = 3057
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_1 = 3058
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060
|
||||
XML_SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061
|
||||
XML_SCHEMAP_CVC_SIMPLE_TYPE = 3062
|
||||
XML_SCHEMAP_COS_CT_EXTENDS_1_1 = 3063
|
||||
XML_SCHEMAP_SRC_IMPORT_1_1 = 3064
|
||||
XML_SCHEMAP_SRC_IMPORT_1_2 = 3065
|
||||
XML_SCHEMAP_SRC_IMPORT_2 = 3066
|
||||
XML_SCHEMAP_SRC_IMPORT_2_1 = 3067
|
||||
XML_SCHEMAP_SRC_IMPORT_2_2 = 3068
|
||||
XML_SCHEMAP_INTERNAL = 3069 # 3069 non-W3C
|
||||
XML_SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072
|
||||
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073
|
||||
XML_SCHEMAP_MG_PROPS_CORRECT_1 = 3074
|
||||
XML_SCHEMAP_MG_PROPS_CORRECT_2 = 3075
|
||||
XML_SCHEMAP_SRC_CT_1 = 3076
|
||||
XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077
|
||||
XML_SCHEMAP_AU_PROPS_CORRECT_2 = 3078
|
||||
XML_SCHEMAP_A_PROPS_CORRECT_2 = 3079
|
||||
XML_SCHEMAP_C_PROPS_CORRECT = 3080
|
||||
XML_SCHEMAP_SRC_REDEFINE = 3081
|
||||
XML_SCHEMAP_SRC_IMPORT = 3082
|
||||
XML_SCHEMAP_WARN_SKIP_SCHEMA = 3083
|
||||
XML_SCHEMAP_WARN_UNLOCATED_SCHEMA = 3084
|
||||
XML_SCHEMAP_WARN_ATTR_REDECL_PROH = 3085
|
||||
XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086 # 3085
|
||||
XML_SCHEMAP_AG_PROPS_CORRECT = 3087 # 3086
|
||||
XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088 # 3087
|
||||
XML_SCHEMAP_AU_PROPS_CORRECT = 3089 # 3088
|
||||
XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090 # 3089
|
||||
XML_SCHEMAP_COS_ALL_LIMITED = 3091 # 3090
|
||||
XML_SCHEMATRONV_ASSERT = 4000
|
||||
XML_SCHEMATRONV_REPORT = 4001
|
||||
XML_MODULE_OPEN = 4900
|
||||
XML_MODULE_CLOSE = 4901
|
||||
XML_CHECK_FOUND_ELEMENT = 5000
|
||||
XML_CHECK_FOUND_ATTRIBUTE = 5001
|
||||
XML_CHECK_FOUND_TEXT = 5002
|
||||
XML_CHECK_FOUND_CDATA = 5003
|
||||
XML_CHECK_FOUND_ENTITYREF = 5004
|
||||
XML_CHECK_FOUND_ENTITY = 5005
|
||||
XML_CHECK_FOUND_PI = 5006
|
||||
XML_CHECK_FOUND_COMMENT = 5007
|
||||
XML_CHECK_FOUND_DOCTYPE = 5008
|
||||
XML_CHECK_FOUND_FRAGMENT = 5009
|
||||
XML_CHECK_FOUND_NOTATION = 5010
|
||||
XML_CHECK_UNKNOWN_NODE = 5011
|
||||
XML_CHECK_ENTITY_TYPE = 5012
|
||||
XML_CHECK_NO_PARENT = 5013
|
||||
XML_CHECK_NO_DOC = 5014
|
||||
XML_CHECK_NO_NAME = 5015
|
||||
XML_CHECK_NO_ELEM = 5016
|
||||
XML_CHECK_WRONG_DOC = 5017
|
||||
XML_CHECK_NO_PREV = 5018
|
||||
XML_CHECK_WRONG_PREV = 5019
|
||||
XML_CHECK_NO_NEXT = 5020
|
||||
XML_CHECK_WRONG_NEXT = 5021
|
||||
XML_CHECK_NOT_DTD = 5022
|
||||
XML_CHECK_NOT_ATTR = 5023
|
||||
XML_CHECK_NOT_ATTR_DECL = 5024
|
||||
XML_CHECK_NOT_ELEM_DECL = 5025
|
||||
XML_CHECK_NOT_ENTITY_DECL = 5026
|
||||
XML_CHECK_NOT_NS_DECL = 5027
|
||||
XML_CHECK_NO_HREF = 5028
|
||||
XML_CHECK_WRONG_PARENT = 5029
|
||||
XML_CHECK_NS_SCOPE = 5030
|
||||
XML_CHECK_NS_ANCESTOR = 5031
|
||||
XML_CHECK_NOT_UTF8 = 5032
|
||||
XML_CHECK_NO_DICT = 5033
|
||||
XML_CHECK_NOT_NCNAME = 5034
|
||||
XML_CHECK_OUTSIDE_DICT = 5035
|
||||
XML_CHECK_WRONG_NAME = 5036
|
||||
XML_CHECK_NAME_NOT_NULL = 5037
|
||||
XML_I18N_NO_NAME = 6000
|
||||
XML_I18N_NO_HANDLER = 6001
|
||||
XML_I18N_EXCESS_HANDLER = 6002
|
||||
XML_I18N_CONV_FAILED = 6003
|
||||
XML_I18N_NO_OUTPUT = 6004
|
||||
XML_BUF_OVERFLOW = 7000
|
||||
|
||||
ctypedef enum xmlRelaxNGValidErr:
|
||||
XML_RELAXNG_OK = 0
|
||||
XML_RELAXNG_ERR_MEMORY = 1
|
||||
XML_RELAXNG_ERR_TYPE = 2
|
||||
XML_RELAXNG_ERR_TYPEVAL = 3
|
||||
XML_RELAXNG_ERR_DUPID = 4
|
||||
XML_RELAXNG_ERR_TYPECMP = 5
|
||||
XML_RELAXNG_ERR_NOSTATE = 6
|
||||
XML_RELAXNG_ERR_NODEFINE = 7
|
||||
XML_RELAXNG_ERR_LISTEXTRA = 8
|
||||
XML_RELAXNG_ERR_LISTEMPTY = 9
|
||||
XML_RELAXNG_ERR_INTERNODATA = 10
|
||||
XML_RELAXNG_ERR_INTERSEQ = 11
|
||||
XML_RELAXNG_ERR_INTEREXTRA = 12
|
||||
XML_RELAXNG_ERR_ELEMNAME = 13
|
||||
XML_RELAXNG_ERR_ATTRNAME = 14
|
||||
XML_RELAXNG_ERR_ELEMNONS = 15
|
||||
XML_RELAXNG_ERR_ATTRNONS = 16
|
||||
XML_RELAXNG_ERR_ELEMWRONGNS = 17
|
||||
XML_RELAXNG_ERR_ATTRWRONGNS = 18
|
||||
XML_RELAXNG_ERR_ELEMEXTRANS = 19
|
||||
XML_RELAXNG_ERR_ATTREXTRANS = 20
|
||||
XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
|
||||
XML_RELAXNG_ERR_NOELEM = 22
|
||||
XML_RELAXNG_ERR_NOTELEM = 23
|
||||
XML_RELAXNG_ERR_ATTRVALID = 24
|
||||
XML_RELAXNG_ERR_CONTENTVALID = 25
|
||||
XML_RELAXNG_ERR_EXTRACONTENT = 26
|
||||
XML_RELAXNG_ERR_INVALIDATTR = 27
|
||||
XML_RELAXNG_ERR_DATAELEM = 28
|
||||
XML_RELAXNG_ERR_VALELEM = 29
|
||||
XML_RELAXNG_ERR_LISTELEM = 30
|
||||
XML_RELAXNG_ERR_DATATYPE = 31
|
||||
XML_RELAXNG_ERR_VALUE = 32
|
||||
XML_RELAXNG_ERR_LIST = 33
|
||||
XML_RELAXNG_ERR_NOGRAMMAR = 34
|
||||
XML_RELAXNG_ERR_EXTRADATA = 35
|
||||
XML_RELAXNG_ERR_LACKDATA = 36
|
||||
XML_RELAXNG_ERR_INTERNAL = 37
|
||||
XML_RELAXNG_ERR_ELEMWRONG = 38
|
||||
XML_RELAXNG_ERR_TEXTWRONG = 39
|
||||
# --- END: GENERATED CONSTANTS ---
|
||||
|
||||
cdef extern from "libxml/xmlerror.h":
|
||||
ctypedef struct xmlError:
|
||||
int domain
|
||||
int code
|
||||
char* message
|
||||
xmlErrorLevel level
|
||||
char* file
|
||||
char* str1
|
||||
char* str2
|
||||
char* str3
|
||||
int line
|
||||
int int1
|
||||
int int2
|
||||
|
||||
ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...) nogil
|
||||
ctypedef void (*xmlStructuredErrorFunc)(void* userData,
|
||||
xmlError* error) nogil
|
||||
|
||||
cdef void xmlSetGenericErrorFunc(
|
||||
void* ctxt, xmlGenericErrorFunc func) nogil
|
||||
cdef void xmlSetStructuredErrorFunc(
|
||||
void* ctxt, xmlStructuredErrorFunc func) nogil
|
||||
|
||||
cdef extern from "libxml/globals.h":
|
||||
cdef xmlStructuredErrorFunc xmlStructuredError
|
||||
cdef void* xmlStructuredErrorContext
|
||||
248
lib/python3.5/site-packages/lxml/includes/xmlparser.pxd
Normal file
248
lib/python3.5/site-packages/lxml/includes/xmlparser.pxd
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
from libc.string cimport const_char
|
||||
|
||||
from lxml.includes.tree cimport (
|
||||
xmlDoc, xmlNode, xmlDict, xmlDtd, xmlChar, const_xmlChar)
|
||||
from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
|
||||
from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc
|
||||
|
||||
|
||||
cdef extern from "libxml/parser.h":
|
||||
ctypedef void (*startElementNsSAX2Func)(void* ctx,
|
||||
const_xmlChar* localname,
|
||||
const_xmlChar* prefix,
|
||||
const_xmlChar* URI,
|
||||
int nb_namespaces,
|
||||
const_xmlChar** namespaces,
|
||||
int nb_attributes,
|
||||
int nb_defaulted,
|
||||
const_xmlChar** attributes)
|
||||
|
||||
ctypedef void (*endElementNsSAX2Func)(void* ctx,
|
||||
const_xmlChar* localname,
|
||||
const_xmlChar* prefix,
|
||||
const_xmlChar* URI)
|
||||
|
||||
ctypedef void (*startElementSAXFunc)(void* ctx, const_xmlChar* name, const_xmlChar** atts)
|
||||
|
||||
ctypedef void (*endElementSAXFunc)(void* ctx, const_xmlChar* name)
|
||||
|
||||
ctypedef void (*charactersSAXFunc)(void* ctx, const_xmlChar* ch, int len)
|
||||
|
||||
ctypedef void (*cdataBlockSAXFunc)(void* ctx, const_xmlChar* value, int len)
|
||||
|
||||
ctypedef void (*commentSAXFunc)(void* ctx, const_xmlChar* value)
|
||||
|
||||
ctypedef void (*processingInstructionSAXFunc)(void* ctx,
|
||||
const_xmlChar* target,
|
||||
const_xmlChar* data)
|
||||
|
||||
ctypedef void (*internalSubsetSAXFunc)(void* ctx,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* externalID,
|
||||
const_xmlChar* systemID)
|
||||
|
||||
ctypedef void (*endDocumentSAXFunc)(void* ctx)
|
||||
|
||||
ctypedef void (*startDocumentSAXFunc)(void* ctx)
|
||||
|
||||
ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name)
|
||||
|
||||
cdef int XML_SAX2_MAGIC
|
||||
|
||||
cdef extern from "libxml/tree.h":
|
||||
ctypedef struct xmlParserInput:
|
||||
int line
|
||||
int length
|
||||
const_xmlChar* base
|
||||
const_xmlChar* cur
|
||||
const_xmlChar* end
|
||||
|
||||
ctypedef struct xmlParserInputBuffer:
|
||||
void* context
|
||||
xmlInputReadCallback readcallback
|
||||
xmlInputCloseCallback closecallback
|
||||
|
||||
ctypedef struct xmlSAXHandlerV1:
|
||||
# same as xmlSAXHandler, but without namespaces
|
||||
pass
|
||||
|
||||
ctypedef struct xmlSAXHandler:
|
||||
internalSubsetSAXFunc internalSubset
|
||||
startElementNsSAX2Func startElementNs
|
||||
endElementNsSAX2Func endElementNs
|
||||
startElementSAXFunc startElement
|
||||
endElementSAXFunc endElement
|
||||
charactersSAXFunc characters
|
||||
cdataBlockSAXFunc cdataBlock
|
||||
referenceSAXFunc reference
|
||||
commentSAXFunc comment
|
||||
processingInstructionSAXFunc processingInstruction
|
||||
startDocumentSAXFunc startDocument
|
||||
endDocumentSAXFunc endDocument
|
||||
int initialized
|
||||
xmlStructuredErrorFunc serror
|
||||
void* _private
|
||||
|
||||
|
||||
cdef extern from "libxml/SAX2.h" nogil:
|
||||
cdef void xmlSAX2StartDocument(void* ctxt)
|
||||
|
||||
|
||||
cdef extern from "libxml/xmlIO.h" nogil:
|
||||
cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc)
|
||||
|
||||
|
||||
cdef extern from "libxml/parser.h":
|
||||
|
||||
cdef xmlDict* xmlDictCreate() nogil
|
||||
cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) nogil
|
||||
cdef void xmlDictFree(xmlDict* sub) nogil
|
||||
cdef int xmlDictReference(xmlDict* dict) nogil
|
||||
|
||||
cdef int XML_COMPLETE_ATTRS # SAX option for adding DTD default attributes
|
||||
cdef int XML_SKIP_IDS # SAX option for not building an XML ID dict
|
||||
|
||||
ctypedef enum xmlParserInputState:
|
||||
XML_PARSER_EOF = -1 # nothing is to be parsed
|
||||
XML_PARSER_START = 0 # nothing has been parsed
|
||||
XML_PARSER_MISC = 1 # Misc* before int subset
|
||||
XML_PARSER_PI = 2 # Within a processing instruction
|
||||
XML_PARSER_DTD = 3 # within some DTD content
|
||||
XML_PARSER_PROLOG = 4 # Misc* after internal subset
|
||||
XML_PARSER_COMMENT = 5 # within a comment
|
||||
XML_PARSER_START_TAG = 6 # within a start tag
|
||||
XML_PARSER_CONTENT = 7 # within the content
|
||||
XML_PARSER_CDATA_SECTION = 8 # within a CDATA section
|
||||
XML_PARSER_END_TAG = 9 # within a closing tag
|
||||
XML_PARSER_ENTITY_DECL = 10 # within an entity declaration
|
||||
XML_PARSER_ENTITY_VALUE = 11 # within an entity value in a decl
|
||||
XML_PARSER_ATTRIBUTE_VALUE = 12 # within an attribute value
|
||||
XML_PARSER_SYSTEM_LITERAL = 13 # within a SYSTEM value
|
||||
XML_PARSER_EPILOG = 14 # the Misc* after the last end tag
|
||||
XML_PARSER_IGNORE = 15 # within an IGNORED section
|
||||
XML_PARSER_PUBLIC_LITERAL = 16 # within a PUBLIC value
|
||||
|
||||
|
||||
ctypedef struct xmlParserCtxt:
|
||||
xmlDoc* myDoc
|
||||
xmlDict* dict
|
||||
int dictNames
|
||||
void* _private
|
||||
bint wellFormed
|
||||
bint recovery
|
||||
int options
|
||||
bint disableSAX
|
||||
int errNo
|
||||
xmlParserInputState instate
|
||||
bint replaceEntities
|
||||
int loadsubset # != 0 if enabled, int value == why
|
||||
bint validate
|
||||
xmlError lastError
|
||||
xmlNode* node
|
||||
xmlSAXHandler* sax
|
||||
void* userData
|
||||
int* spaceTab
|
||||
int spaceMax
|
||||
bint html
|
||||
bint progressive
|
||||
int inSubset
|
||||
int charset
|
||||
xmlParserInput* input
|
||||
|
||||
ctypedef enum xmlParserOption:
|
||||
XML_PARSE_RECOVER = 1 # recover on errors
|
||||
XML_PARSE_NOENT = 2 # substitute entities
|
||||
XML_PARSE_DTDLOAD = 4 # load the external subset
|
||||
XML_PARSE_DTDATTR = 8 # default DTD attributes
|
||||
XML_PARSE_DTDVALID = 16 # validate with the DTD
|
||||
XML_PARSE_NOERROR = 32 # suppress error reports
|
||||
XML_PARSE_NOWARNING = 64 # suppress warning reports
|
||||
XML_PARSE_PEDANTIC = 128 # pedantic error reporting
|
||||
XML_PARSE_NOBLANKS = 256 # remove blank nodes
|
||||
XML_PARSE_SAX1 = 512 # use the SAX1 interface internally
|
||||
XML_PARSE_XINCLUDE = 1024 # Implement XInclude substitition
|
||||
XML_PARSE_NONET = 2048 # Forbid network access
|
||||
XML_PARSE_NODICT = 4096 # Do not reuse the context dictionnary
|
||||
XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations
|
||||
XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes
|
||||
XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes
|
||||
# libxml2 2.6.21+ only:
|
||||
XML_PARSE_COMPACT = 65536 # compact small text nodes
|
||||
# libxml2 2.7.0+ only:
|
||||
XML_PARSE_OLD10 = 131072 # parse using XML-1.0 before update 5
|
||||
XML_PARSE_NOBASEFIX = 262144 # do not fixup XINCLUDE xml:base uris
|
||||
XML_PARSE_HUGE = 524288 # relax any hardcoded limit from the parser
|
||||
# libxml2 2.7.3+ only:
|
||||
XML_PARSE_OLDSAX = 1048576 # parse using SAX2 interface before 2.7.0
|
||||
# libxml2 2.8.0+ only:
|
||||
XML_PARSE_IGNORE_ENC = 2097152 # ignore internal document encoding hint
|
||||
# libxml2 2.9.0+ only:
|
||||
XML_PARSE_BIG_LINES = 4194304 # Store big lines numbers in text PSVI field
|
||||
|
||||
cdef void xmlInitParser() nogil
|
||||
cdef void xmlCleanupParser() nogil
|
||||
|
||||
cdef int xmlLineNumbersDefault(int onoff) nogil
|
||||
cdef xmlParserCtxt* xmlNewParserCtxt() nogil
|
||||
cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt,
|
||||
xmlParserInputBuffer* input,
|
||||
int enc) nogil
|
||||
cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
|
||||
cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
|
||||
cdef void xmlCtxtReset(xmlParserCtxt* ctxt) nogil
|
||||
cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt) nogil
|
||||
cdef int xmlParseChunk(xmlParserCtxt* ctxt,
|
||||
char* chunk, int size, int terminate) nogil
|
||||
cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt,
|
||||
char* cur, char* URL, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt,
|
||||
char* filename, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* xmlCtxtReadIO(xmlParserCtxt* ctxt,
|
||||
xmlInputReadCallback ioread,
|
||||
xmlInputCloseCallback ioclose,
|
||||
void* ioctx,
|
||||
char* URL, char* encoding,
|
||||
int options) nogil
|
||||
cdef xmlDoc* xmlCtxtReadMemory(xmlParserCtxt* ctxt,
|
||||
char* buffer, int size,
|
||||
char* filename, const_char* encoding,
|
||||
int options) nogil
|
||||
|
||||
# iterparse:
|
||||
|
||||
cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax,
|
||||
void* user_data,
|
||||
char* chunk,
|
||||
int size,
|
||||
char* filename) nogil
|
||||
|
||||
cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt,
|
||||
char* chunk,
|
||||
int size,
|
||||
char* filename,
|
||||
char* encoding) nogil
|
||||
|
||||
# entity loaders:
|
||||
|
||||
ctypedef xmlParserInput* (*xmlExternalEntityLoader)(
|
||||
const_char * URL, const_char * ID, xmlParserCtxt* context) nogil
|
||||
cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() nogil
|
||||
cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) nogil
|
||||
|
||||
# DTDs:
|
||||
|
||||
cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) nogil
|
||||
cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax,
|
||||
xmlParserInputBuffer* input,
|
||||
int enc) nogil
|
||||
|
||||
cdef extern from "libxml/parserInternals.h":
|
||||
cdef xmlParserInput* xmlNewInputStream(xmlParserCtxt* ctxt)
|
||||
cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt,
|
||||
char* buffer) nogil
|
||||
cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt,
|
||||
char* filename) nogil
|
||||
cdef void xmlFreeInputStream(xmlParserInput* input) nogil
|
||||
cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) nogil
|
||||
35
lib/python3.5/site-packages/lxml/includes/xmlschema.pxd
Normal file
35
lib/python3.5/site-packages/lxml/includes/xmlschema.pxd
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
from lxml.includes.tree cimport xmlDoc
|
||||
from lxml.includes.xmlparser cimport xmlSAXHandler
|
||||
from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
|
||||
|
||||
cdef extern from "libxml/xmlschemas.h":
|
||||
ctypedef struct xmlSchema
|
||||
ctypedef struct xmlSchemaParserCtxt
|
||||
|
||||
ctypedef struct xmlSchemaSAXPlugStruct
|
||||
ctypedef struct xmlSchemaValidCtxt
|
||||
|
||||
ctypedef enum xmlSchemaValidOption:
|
||||
XML_SCHEMA_VAL_VC_I_CREATE = 1
|
||||
|
||||
cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil
|
||||
cdef void xmlSchemaSetParserStructuredErrors(xmlSchemaParserCtxt* ctxt,
|
||||
xmlStructuredErrorFunc serror, void *ctx)
|
||||
cdef void xmlSchemaSetValidStructuredErrors(xmlSchemaValidCtxt* ctxt,
|
||||
xmlStructuredErrorFunc serror, void *ctx)
|
||||
|
||||
cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil
|
||||
cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil
|
||||
cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) nogil
|
||||
cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) nogil
|
||||
cdef void xmlSchemaFree(xmlSchema* schema) nogil
|
||||
cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil
|
||||
cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil
|
||||
cdef int xmlSchemaSetValidOptions(xmlSchemaValidCtxt* ctxt,
|
||||
int options) nogil
|
||||
|
||||
cdef xmlSchemaSAXPlugStruct* xmlSchemaSAXPlug(xmlSchemaValidCtxt* ctxt,
|
||||
xmlSAXHandler** sax,
|
||||
void** data) nogil
|
||||
cdef int xmlSchemaSAXUnplug(xmlSchemaSAXPlugStruct* sax_plug)
|
||||
cdef int xmlSchemaIsValid(xmlSchemaValidCtxt* ctxt)
|
||||
135
lib/python3.5/site-packages/lxml/includes/xpath.pxd
Normal file
135
lib/python3.5/site-packages/lxml/includes/xpath.pxd
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
from lxml.includes cimport tree
|
||||
from lxml.includes cimport xmlerror
|
||||
|
||||
from libc.string cimport const_char
|
||||
from lxml.includes.tree cimport xmlChar, const_xmlChar
|
||||
|
||||
cdef extern from "libxml/xpath.h":
|
||||
ctypedef enum xmlXPathObjectType:
|
||||
XPATH_UNDEFINED = 0
|
||||
XPATH_NODESET = 1
|
||||
XPATH_BOOLEAN = 2
|
||||
XPATH_NUMBER = 3
|
||||
XPATH_STRING = 4
|
||||
XPATH_POINT = 5
|
||||
XPATH_RANGE = 6
|
||||
XPATH_LOCATIONSET = 7
|
||||
XPATH_USERS = 8
|
||||
XPATH_XSLT_TREE = 9
|
||||
|
||||
ctypedef enum xmlXPathError:
|
||||
XPATH_EXPRESSION_OK = 0
|
||||
XPATH_NUMBER_ERROR = 1
|
||||
XPATH_UNFINISHED_LITERAL_ERROR = 2
|
||||
XPATH_START_LITERAL_ERROR = 3
|
||||
XPATH_VARIABLE_REF_ERROR = 4
|
||||
XPATH_UNDEF_VARIABLE_ERROR = 5
|
||||
XPATH_INVALID_PREDICATE_ERROR = 6
|
||||
XPATH_EXPR_ERROR = 7
|
||||
XPATH_UNCLOSED_ERROR = 8
|
||||
XPATH_UNKNOWN_FUNC_ERROR = 9
|
||||
XPATH_INVALID_OPERAND = 10
|
||||
XPATH_INVALID_TYPE = 11
|
||||
XPATH_INVALID_ARITY = 12
|
||||
XPATH_INVALID_CTXT_SIZE = 13
|
||||
XPATH_INVALID_CTXT_POSITION = 14
|
||||
XPATH_MEMORY_ERROR = 15
|
||||
XPTR_SYNTAX_ERROR = 16
|
||||
XPTR_RESOURCE_ERROR = 17
|
||||
XPTR_SUB_RESOURCE_ERROR = 18
|
||||
XPATH_UNDEF_PREFIX_ERROR = 19
|
||||
XPATH_ENCODING_ERROR = 20
|
||||
XPATH_INVALID_CHAR_ERROR = 21
|
||||
XPATH_INVALID_CTXT = 22
|
||||
|
||||
ctypedef struct xmlNodeSet:
|
||||
int nodeNr
|
||||
int nodeMax
|
||||
tree.xmlNode** nodeTab
|
||||
|
||||
ctypedef struct xmlXPathObject:
|
||||
xmlXPathObjectType type
|
||||
xmlNodeSet* nodesetval
|
||||
bint boolval
|
||||
double floatval
|
||||
xmlChar* stringval
|
||||
|
||||
ctypedef struct xmlXPathContext:
|
||||
tree.xmlDoc* doc
|
||||
tree.xmlNode* node
|
||||
tree.xmlDict* dict
|
||||
tree.xmlHashTable* nsHash
|
||||
const_xmlChar* function
|
||||
const_xmlChar* functionURI
|
||||
xmlerror.xmlStructuredErrorFunc error
|
||||
xmlerror.xmlError lastError
|
||||
void* userData
|
||||
|
||||
ctypedef struct xmlXPathParserContext:
|
||||
xmlXPathContext* context
|
||||
xmlXPathObject* value
|
||||
tree.xmlNode* ancestor
|
||||
int error
|
||||
|
||||
ctypedef struct xmlXPathCompExpr
|
||||
|
||||
ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs) nogil
|
||||
ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* ns_uri) nogil
|
||||
|
||||
cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) nogil
|
||||
cdef xmlXPathObject* xmlXPathEvalExpression(const_xmlChar* str,
|
||||
xmlXPathContext* ctxt) nogil
|
||||
cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp,
|
||||
xmlXPathContext* ctxt) nogil
|
||||
cdef xmlXPathCompExpr* xmlXPathCompile(const_xmlChar* str) nogil
|
||||
cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt,
|
||||
const_xmlChar* str) nogil
|
||||
cdef void xmlXPathFreeContext(xmlXPathContext* ctxt) nogil
|
||||
cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp) nogil
|
||||
cdef void xmlXPathFreeObject(xmlXPathObject* obj) nogil
|
||||
cdef int xmlXPathRegisterNs(xmlXPathContext* ctxt,
|
||||
const_xmlChar* prefix, const_xmlChar* ns_uri) nogil
|
||||
|
||||
cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val) nogil
|
||||
cdef void xmlXPathFreeNodeSet(xmlNodeSet* val) nogil
|
||||
|
||||
|
||||
cdef extern from "libxml/xpathInternals.h":
|
||||
cdef int xmlXPathRegisterFunc(xmlXPathContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
xmlXPathFunction f) nogil
|
||||
cdef int xmlXPathRegisterFuncNS(xmlXPathContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* ns_uri,
|
||||
xmlXPathFunction f) nogil
|
||||
cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt,
|
||||
xmlXPathFuncLookupFunc f,
|
||||
void *funcCtxt) nogil
|
||||
cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt,
|
||||
const_xmlChar* name,
|
||||
xmlXPathObject* value) nogil
|
||||
cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* ns_uri,
|
||||
xmlXPathObject* value) nogil
|
||||
cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) nogil
|
||||
cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) nogil
|
||||
cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) nogil
|
||||
cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value) nogil
|
||||
|
||||
cdef xmlXPathObject* xmlXPathNewCString(const_char *val) nogil
|
||||
cdef xmlXPathObject* xmlXPathWrapCString(const_char * val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewString(const_xmlChar *val) nogil
|
||||
cdef xmlXPathObject* xmlXPathWrapString(const_xmlChar * val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewFloat(double val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewBoolean(int val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val) nogil
|
||||
cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val) nogil
|
||||
cdef void xmlXPathNodeSetAdd(xmlNodeSet* cur,
|
||||
tree.xmlNode* val) nogil
|
||||
cdef void xmlXPathNodeSetAddUnique(xmlNodeSet* cur,
|
||||
tree.xmlNode* val) nogil
|
||||
cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val) nogil
|
||||
cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error) nogil
|
||||
176
lib/python3.5/site-packages/lxml/includes/xslt.pxd
Normal file
176
lib/python3.5/site-packages/lxml/includes/xslt.pxd
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
from lxml.includes.tree cimport xmlDoc, xmlNode, xmlDict, xmlChar, const_xmlChar
|
||||
from lxml.includes.xpath cimport xmlXPathContext, xmlXPathFunction
|
||||
|
||||
from libc.string cimport const_char
|
||||
|
||||
cdef extern from "libxslt/xslt.h":
|
||||
cdef int xsltLibxsltVersion
|
||||
cdef int xsltMaxDepth
|
||||
|
||||
cdef extern from "libxslt/xsltconfig.h":
|
||||
cdef int LIBXSLT_VERSION
|
||||
|
||||
cdef extern from "libxslt/xsltInternals.h":
|
||||
ctypedef enum xsltTransformState:
|
||||
XSLT_STATE_OK # 0
|
||||
XSLT_STATE_ERROR # 1
|
||||
XSLT_STATE_STOPPED # 2
|
||||
|
||||
ctypedef struct xsltDocument:
|
||||
xmlDoc* doc
|
||||
|
||||
ctypedef struct xsltStylesheet:
|
||||
xmlChar* encoding
|
||||
xmlDoc* doc
|
||||
int errors
|
||||
|
||||
ctypedef struct xsltTransformContext:
|
||||
xsltStylesheet* style
|
||||
xmlXPathContext* xpathCtxt
|
||||
xsltDocument* document
|
||||
void* _private
|
||||
xmlDict* dict
|
||||
int profile
|
||||
xmlNode* node
|
||||
xmlDoc* output
|
||||
xmlNode* insert
|
||||
xmlNode* inst
|
||||
xsltTransformState state
|
||||
|
||||
ctypedef struct xsltStackElem
|
||||
|
||||
ctypedef struct xsltTemplate
|
||||
|
||||
cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) nogil
|
||||
cdef void xsltFreeStylesheet(xsltStylesheet* sheet) nogil
|
||||
|
||||
cdef extern from "libxslt/extensions.h":
|
||||
ctypedef void (*xsltTransformFunction)(xsltTransformContext* ctxt,
|
||||
xmlNode* context_node,
|
||||
xmlNode* inst,
|
||||
void* precomp_unused) nogil
|
||||
|
||||
cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* URI,
|
||||
xmlXPathFunction function) nogil
|
||||
cdef int xsltRegisterExtModuleFunction(const_xmlChar* name, const_xmlChar* URI,
|
||||
xmlXPathFunction function) nogil
|
||||
cdef int xsltUnregisterExtModuleFunction(const_xmlChar* name, const_xmlChar* URI)
|
||||
cdef xmlXPathFunction xsltExtModuleFunctionLookup(
|
||||
const_xmlChar* name, const_xmlChar* URI) nogil
|
||||
cdef int xsltRegisterExtPrefix(xsltStylesheet* style,
|
||||
const_xmlChar* prefix, const_xmlChar* URI) nogil
|
||||
cdef int xsltRegisterExtElement(xsltTransformContext* ctxt,
|
||||
const_xmlChar* name, const_xmlChar* URI,
|
||||
xsltTransformFunction function) nogil
|
||||
|
||||
cdef extern from "libxslt/documents.h":
|
||||
ctypedef enum xsltLoadType:
|
||||
XSLT_LOAD_START
|
||||
XSLT_LOAD_STYLESHEET
|
||||
XSLT_LOAD_DOCUMENT
|
||||
|
||||
ctypedef xmlDoc* (*xsltDocLoaderFunc)(const_xmlChar* URI, xmlDict* dict,
|
||||
int options,
|
||||
void* ctxt,
|
||||
xsltLoadType type) nogil
|
||||
cdef xsltDocLoaderFunc xsltDocDefaultLoader
|
||||
cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) nogil
|
||||
|
||||
cdef extern from "libxslt/transform.h":
|
||||
cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc,
|
||||
const_char** params) nogil
|
||||
cdef xmlDoc* xsltApplyStylesheetUser(xsltStylesheet* style, xmlDoc* doc,
|
||||
const_char** params, const_char* output,
|
||||
void* profile,
|
||||
xsltTransformContext* context) nogil
|
||||
cdef void xsltProcessOneNode(xsltTransformContext* ctxt,
|
||||
xmlNode* contextNode,
|
||||
xsltStackElem* params) nogil
|
||||
cdef xsltTransformContext* xsltNewTransformContext(xsltStylesheet* style,
|
||||
xmlDoc* doc) nogil
|
||||
cdef void xsltFreeTransformContext(xsltTransformContext* context) nogil
|
||||
cdef void xsltApplyOneTemplate(xsltTransformContext* ctxt,
|
||||
xmlNode* contextNode, xmlNode* list,
|
||||
xsltTemplate* templ,
|
||||
xsltStackElem* params) nogil
|
||||
|
||||
cdef extern from "libxslt/xsltutils.h":
|
||||
cdef int xsltSaveResultToString(xmlChar** doc_txt_ptr,
|
||||
int* doc_txt_len,
|
||||
xmlDoc* result,
|
||||
xsltStylesheet* style) nogil
|
||||
|
||||
cdef void xsltSetGenericErrorFunc(
|
||||
void* ctxt, void (*handler)(void* ctxt, char* msg, ...)) nogil
|
||||
cdef void xsltSetTransformErrorFunc(
|
||||
xsltTransformContext*, void* ctxt,
|
||||
void (*handler)(void* ctxt, char* msg, ...) nogil) nogil
|
||||
cdef void xsltTransformError(xsltTransformContext* ctxt,
|
||||
xsltStylesheet* style,
|
||||
xmlNode* node, char* msg, ...)
|
||||
cdef void xsltSetCtxtParseOptions(
|
||||
xsltTransformContext* ctxt, int options)
|
||||
|
||||
cdef extern from "libxslt/security.h":
|
||||
ctypedef struct xsltSecurityPrefs
|
||||
ctypedef enum xsltSecurityOption:
|
||||
XSLT_SECPREF_READ_FILE = 1
|
||||
XSLT_SECPREF_WRITE_FILE = 2
|
||||
XSLT_SECPREF_CREATE_DIRECTORY = 3
|
||||
XSLT_SECPREF_READ_NETWORK = 4
|
||||
XSLT_SECPREF_WRITE_NETWORK = 5
|
||||
|
||||
ctypedef int (*xsltSecurityCheck)(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt,
|
||||
char* value) nogil
|
||||
|
||||
cdef xsltSecurityPrefs* xsltNewSecurityPrefs() nogil
|
||||
cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) nogil
|
||||
cdef int xsltSecurityForbid(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt,
|
||||
char* value) nogil
|
||||
cdef int xsltSecurityAllow(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt,
|
||||
char* value) nogil
|
||||
cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec,
|
||||
xsltSecurityOption option,
|
||||
xsltSecurityCheck func) nogil
|
||||
cdef xsltSecurityCheck xsltGetSecurityPrefs(
|
||||
xsltSecurityPrefs* sec,
|
||||
xsltSecurityOption option) nogil
|
||||
cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec,
|
||||
xsltTransformContext* ctxt) nogil
|
||||
cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt) nogil
|
||||
|
||||
cdef extern from "libxslt/variables.h":
|
||||
cdef int xsltQuoteUserParams(xsltTransformContext* ctxt,
|
||||
const_char** params)
|
||||
cdef int xsltQuoteOneUserParam(xsltTransformContext* ctxt,
|
||||
const_xmlChar* name,
|
||||
const_xmlChar* value)
|
||||
|
||||
cdef extern from "libxslt/extra.h":
|
||||
const_xmlChar* XSLT_LIBXSLT_NAMESPACE
|
||||
const_xmlChar* XSLT_XALAN_NAMESPACE
|
||||
const_xmlChar* XSLT_SAXON_NAMESPACE
|
||||
const_xmlChar* XSLT_XT_NAMESPACE
|
||||
|
||||
cdef xmlXPathFunction xsltFunctionNodeSet
|
||||
cdef void xsltRegisterAllExtras() nogil
|
||||
|
||||
cdef extern from "libexslt/exslt.h":
|
||||
cdef void exsltRegisterAll() nogil
|
||||
|
||||
# libexslt 1.1.25+
|
||||
const_xmlChar* EXSLT_DATE_NAMESPACE
|
||||
const_xmlChar* EXSLT_SETS_NAMESPACE
|
||||
const_xmlChar* EXSLT_MATH_NAMESPACE
|
||||
const_xmlChar* EXSLT_STRINGS_NAMESPACE
|
||||
|
||||
cdef int exsltDateXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
cdef int exsltSetsXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
cdef int exsltMathXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
cdef int exsltStrXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix)
|
||||
|
||||
334
lib/python3.5/site-packages/lxml/isoschematron/__init__.py
Normal file
334
lib/python3.5/site-packages/lxml/isoschematron/__init__.py
Normal file
|
|
@ -0,0 +1,334 @@
|
|||
"""The ``lxml.isoschematron`` package implements ISO Schematron support on top
|
||||
of the pure-xslt 'skeleton' implementation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os.path
|
||||
from lxml import etree as _etree # due to validator __init__ signature
|
||||
|
||||
|
||||
# some compat stuff, borrowed from lxml.html
|
||||
try:
|
||||
unicode
|
||||
except NameError:
|
||||
# Python 3
|
||||
unicode = str
|
||||
try:
|
||||
basestring
|
||||
except NameError:
|
||||
# Python 3
|
||||
basestring = str
|
||||
|
||||
|
||||
__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
|
||||
'iso_abstract_expand', 'iso_svrl_for_xslt1',
|
||||
'svrl_validation_errors', 'schematron_schema_valid',
|
||||
'stylesheet_params', 'Schematron']
|
||||
|
||||
|
||||
# some namespaces
|
||||
#FIXME: Maybe lxml should provide a dedicated place for common namespace
|
||||
#FIXME: definitions?
|
||||
XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
|
||||
RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
|
||||
SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
|
||||
SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
|
||||
|
||||
|
||||
# some helpers
|
||||
_schematron_root = '{%s}schema' % SCHEMATRON_NS
|
||||
_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
|
||||
_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
|
||||
|
||||
|
||||
# the iso-schematron skeleton implementation steps aka xsl transformations
|
||||
extract_xsd = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
|
||||
extract_rng = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
|
||||
iso_dsdl_include = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
|
||||
'iso_dsdl_include.xsl')))
|
||||
iso_abstract_expand = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
|
||||
'iso_abstract_expand.xsl')))
|
||||
iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
|
||||
os.path.join(_resources_dir,
|
||||
'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
|
||||
|
||||
|
||||
# svrl result accessors
|
||||
svrl_validation_errors = _etree.XPath(
|
||||
'//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
|
||||
|
||||
|
||||
# RelaxNG validator for schematron schemas
|
||||
schematron_schema_valid = _etree.RelaxNG(_etree.parse(
|
||||
os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')))
|
||||
|
||||
|
||||
def stylesheet_params(**kwargs):
|
||||
"""Convert keyword args to a dictionary of stylesheet parameters.
|
||||
XSL stylesheet parameters must be XPath expressions, i.e.:
|
||||
|
||||
* string expressions, like "'5'"
|
||||
* simple (number) expressions, like "5"
|
||||
* valid XPath expressions, like "/a/b/text()"
|
||||
|
||||
This function converts native Python keyword arguments to stylesheet
|
||||
parameters following these rules:
|
||||
If an arg is a string wrap it with XSLT.strparam().
|
||||
If an arg is an XPath object use its path string.
|
||||
If arg is None raise TypeError.
|
||||
Else convert arg to string.
|
||||
"""
|
||||
result = {}
|
||||
for key, val in kwargs.items():
|
||||
if isinstance(val, basestring):
|
||||
val = _etree.XSLT.strparam(val)
|
||||
elif val is None:
|
||||
raise TypeError('None not allowed as a stylesheet parameter')
|
||||
elif not isinstance(val, _etree.XPath):
|
||||
val = unicode(val)
|
||||
result[key] = val
|
||||
return result
|
||||
|
||||
|
||||
# helper function for use in Schematron __init__
|
||||
def _stylesheet_param_dict(paramsDict, kwargsDict):
|
||||
"""Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
|
||||
stylesheet arguments.
|
||||
kwargsDict entries with a value of None are ignored.
|
||||
"""
|
||||
# beware of changing mutable default arg
|
||||
paramsDict = dict(paramsDict)
|
||||
for k, v in kwargsDict.items():
|
||||
if v is not None: # None values do not override
|
||||
paramsDict[k] = v
|
||||
paramsDict = stylesheet_params(**paramsDict)
|
||||
return paramsDict
|
||||
|
||||
|
||||
class Schematron(_etree._Validator):
|
||||
"""An ISO Schematron validator.
|
||||
|
||||
Pass a root Element or an ElementTree to turn it into a validator.
|
||||
Alternatively, pass a filename as keyword argument 'file' to parse from
|
||||
the file system.
|
||||
|
||||
Schematron is a less well known, but very powerful schema language.
|
||||
The main idea is to use the capabilities of XPath to put restrictions on
|
||||
the structure and the content of XML documents.
|
||||
|
||||
The standard behaviour is to fail on ``failed-assert`` findings only
|
||||
(``ASSERTS_ONLY``). To change this, you can either pass a report filter
|
||||
function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
|
||||
or a custom ``XPath`` object), or subclass isoschematron.Schematron for
|
||||
complete control of the validation process.
|
||||
|
||||
Built on the Schematron language 'reference' skeleton pure-xslt
|
||||
implementation, the validator is created as an XSLT 1.0 stylesheet using
|
||||
these steps:
|
||||
|
||||
0) (Extract from XML Schema or RelaxNG schema)
|
||||
1) Process inclusions
|
||||
2) Process abstract patterns
|
||||
3) Compile the schematron schema to XSLT
|
||||
|
||||
The ``include`` and ``expand`` keyword arguments can be used to switch off
|
||||
steps 1) and 2).
|
||||
To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
|
||||
keyword arguments ``include_params``, ``expand_params`` or
|
||||
``compile_params``.
|
||||
For convenience, the compile-step parameter ``phase`` is also exposed as a
|
||||
keyword argument ``phase``. This takes precedence if the parameter is also
|
||||
given in the parameter dictionary.
|
||||
|
||||
If ``store_schematron`` is set to True, the (included-and-expanded)
|
||||
schematron document tree is stored and available through the ``schematron``
|
||||
property.
|
||||
If ``store_xslt`` is set to True, the validation XSLT document tree will be
|
||||
stored and can be retrieved through the ``validator_xslt`` property.
|
||||
With ``store_report`` set to True (default: False), the resulting validation
|
||||
report document gets stored and can be accessed as the ``validation_report``
|
||||
property.
|
||||
|
||||
Here is a usage example::
|
||||
|
||||
>>> from lxml import etree
|
||||
>>> from lxml.isoschematron import Schematron
|
||||
|
||||
>>> schematron = Schematron(etree.XML('''
|
||||
... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
|
||||
... <pattern id="id_only_attribute">
|
||||
... <title>id is the only permitted attribute name</title>
|
||||
... <rule context="*">
|
||||
... <report test="@*[not(name()='id')]">Attribute
|
||||
... <name path="@*[not(name()='id')]"/> is forbidden<name/>
|
||||
... </report>
|
||||
... </rule>
|
||||
... </pattern>
|
||||
... </schema>'''),
|
||||
... error_finder=Schematron.ASSERTS_AND_REPORTS)
|
||||
|
||||
>>> xml = etree.XML('''
|
||||
... <AAA name="aaa">
|
||||
... <BBB id="bbb"/>
|
||||
... <CCC color="ccc"/>
|
||||
... </AAA>
|
||||
... ''')
|
||||
|
||||
>>> schematron.validate(xml)
|
||||
False
|
||||
|
||||
>>> xml = etree.XML('''
|
||||
... <AAA id="aaa">
|
||||
... <BBB id="bbb"/>
|
||||
... <CCC/>
|
||||
... </AAA>
|
||||
... ''')
|
||||
|
||||
>>> schematron.validate(xml)
|
||||
True
|
||||
"""
|
||||
|
||||
# libxml2 error categorization for validation errors
|
||||
_domain = _etree.ErrorDomains.SCHEMATRONV
|
||||
_level = _etree.ErrorLevels.ERROR
|
||||
_error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
|
||||
|
||||
# convenience definitions for common behaviours
|
||||
ASSERTS_ONLY = svrl_validation_errors # Default
|
||||
ASSERTS_AND_REPORTS = _etree.XPath(
|
||||
'//svrl:failed-assert | //svrl:successful-report',
|
||||
namespaces={'svrl': SVRL_NS})
|
||||
|
||||
def _extract(self, element):
|
||||
"""Extract embedded schematron schema from non-schematron host schema.
|
||||
This method will only be called by __init__ if the given schema document
|
||||
is not a schematron schema by itself.
|
||||
Must return a schematron schema document tree or None.
|
||||
"""
|
||||
schematron = None
|
||||
if element.tag == _xml_schema_root:
|
||||
schematron = self._extract_xsd(element)
|
||||
elif element.nsmap[element.prefix] == RELAXNG_NS:
|
||||
# RelaxNG does not have a single unique root element
|
||||
schematron = self._extract_rng(element)
|
||||
return schematron
|
||||
|
||||
# customization points
|
||||
# etree.XSLT objects that provide the extract, include, expand, compile
|
||||
# steps
|
||||
_extract_xsd = extract_xsd
|
||||
_extract_rng = extract_rng
|
||||
_include = iso_dsdl_include
|
||||
_expand = iso_abstract_expand
|
||||
_compile = iso_svrl_for_xslt1
|
||||
|
||||
# etree.xpath object that determines input document validity when applied to
|
||||
# the svrl result report; must return a list of result elements (empty if
|
||||
# valid)
|
||||
_validation_errors = ASSERTS_ONLY
|
||||
|
||||
def __init__(self, etree=None, file=None, include=True, expand=True,
|
||||
include_params={}, expand_params={}, compile_params={},
|
||||
store_schematron=False, store_xslt=False, store_report=False,
|
||||
phase=None, error_finder=ASSERTS_ONLY):
|
||||
super(Schematron, self).__init__()
|
||||
|
||||
self._store_report = store_report
|
||||
self._schematron = None
|
||||
self._validator_xslt = None
|
||||
self._validation_report = None
|
||||
if error_finder is not self.ASSERTS_ONLY:
|
||||
self._validation_errors = error_finder
|
||||
|
||||
# parse schema document, may be a schematron schema or an XML Schema or
|
||||
# a RelaxNG schema with embedded schematron rules
|
||||
root = None
|
||||
try:
|
||||
if etree is not None:
|
||||
if _etree.iselement(etree):
|
||||
root = etree
|
||||
else:
|
||||
root = etree.getroot()
|
||||
elif file is not None:
|
||||
root = _etree.parse(file).getroot()
|
||||
except Exception:
|
||||
raise _etree.SchematronParseError(
|
||||
"No tree or file given: %s" % sys.exc_info()[1])
|
||||
if root is None:
|
||||
raise ValueError("Empty tree")
|
||||
if root.tag == _schematron_root:
|
||||
schematron = root
|
||||
else:
|
||||
schematron = self._extract(root)
|
||||
if schematron is None:
|
||||
raise _etree.SchematronParseError(
|
||||
"Document is not a schematron schema or schematron-extractable")
|
||||
# perform the iso-schematron skeleton implementation steps to get a
|
||||
# validating xslt
|
||||
if include:
|
||||
schematron = self._include(schematron, **include_params)
|
||||
if expand:
|
||||
schematron = self._expand(schematron, **expand_params)
|
||||
if not schematron_schema_valid(schematron):
|
||||
raise _etree.SchematronParseError(
|
||||
"invalid schematron schema: %s" %
|
||||
schematron_schema_valid.error_log)
|
||||
if store_schematron:
|
||||
self._schematron = schematron
|
||||
# add new compile keyword args here if exposing them
|
||||
compile_kwargs = {'phase': phase}
|
||||
compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
|
||||
validator_xslt = self._compile(schematron, **compile_params)
|
||||
if store_xslt:
|
||||
self._validator_xslt = validator_xslt
|
||||
self._validator = _etree.XSLT(validator_xslt)
|
||||
|
||||
def __call__(self, etree):
|
||||
"""Validate doc using Schematron.
|
||||
|
||||
Returns true if document is valid, false if not.
|
||||
"""
|
||||
self._clear_error_log()
|
||||
result = self._validator(etree)
|
||||
if self._store_report:
|
||||
self._validation_report = result
|
||||
errors = self._validation_errors(result)
|
||||
if errors:
|
||||
if _etree.iselement(etree):
|
||||
fname = etree.getroottree().docinfo.URL or '<file>'
|
||||
else:
|
||||
fname = etree.docinfo.URL or '<file>'
|
||||
for error in errors:
|
||||
# Does svrl report the line number, anywhere? Don't think so.
|
||||
self._append_log_message(
|
||||
domain=self._domain, type=self._error_type,
|
||||
level=self._level, line=0,
|
||||
message=_etree.tostring(error, encoding='unicode'),
|
||||
filename=fname)
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def schematron(self):
|
||||
"""ISO-schematron schema document (None if object has been initialized
|
||||
with store_schematron=False).
|
||||
"""
|
||||
return self._schematron
|
||||
|
||||
@property
|
||||
def validator_xslt(self):
|
||||
"""ISO-schematron skeleton implementation XSLT validator document (None
|
||||
if object has been initialized with store_xslt=False).
|
||||
"""
|
||||
return self._validator_xslt
|
||||
|
||||
@property
|
||||
def validation_report(self):
|
||||
"""ISO-schematron validation result report (None if result-storing has
|
||||
been turned off).
|
||||
"""
|
||||
return self._validation_report
|
||||
|
|
@ -0,0 +1,622 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
(c) International Organization for Standardization 2005.
|
||||
Permission to copy in any form is granted for use with conforming
|
||||
SGML systems and applications as defined in ISO 8879,
|
||||
provided this notice is included in all copies.
|
||||
-->
|
||||
<grammar ns="http://purl.oclc.org/dsdl/schematron" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
||||
<start>
|
||||
<ref name="schema"/>
|
||||
</start>
|
||||
<!-- Element declarations -->
|
||||
<define name="schema">
|
||||
<element name="schema">
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<optional>
|
||||
<attribute name="schemaVersion">
|
||||
<ref name="non-empty-string"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="defaultPhase">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="queryBinding">
|
||||
<ref name="non-empty-string"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<group>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<zeroOrMore>
|
||||
<ref name="ns"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="phase"/>
|
||||
</zeroOrMore>
|
||||
<oneOrMore>
|
||||
<ref name="pattern"/>
|
||||
</oneOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<optional>
|
||||
<ref name="diagnostics"/>
|
||||
</optional>
|
||||
</group>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="active">
|
||||
<element name="active">
|
||||
<attribute name="pattern">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="dir"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="assert">
|
||||
<element name="assert">
|
||||
<attribute name="test">
|
||||
<ref name="exprValue"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="flag">
|
||||
<ref name="flagValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="diagnostics">
|
||||
<data type="IDREFS"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<ref name="linkable"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="name"/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="diagnostic">
|
||||
<element name="diagnostic">
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<ref name="rich"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="diagnostics">
|
||||
<element name="diagnostics">
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="diagnostic"/>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="dir">
|
||||
<element name="dir">
|
||||
<optional>
|
||||
<attribute name="value">
|
||||
<choice>
|
||||
<value>ltr</value>
|
||||
<value>rtl</value>
|
||||
</choice>
|
||||
</attribute>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<text/>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="emph">
|
||||
<element name="emph">
|
||||
<text/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="extends">
|
||||
<element name="extends">
|
||||
<attribute name="rule">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="let">
|
||||
<element name="let">
|
||||
<attribute name="name">
|
||||
<ref name="nameValue"/>
|
||||
</attribute>
|
||||
<attribute name="value">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</attribute>
|
||||
</element>
|
||||
</define>
|
||||
<define name="name">
|
||||
<element name="name">
|
||||
<optional>
|
||||
<attribute name="path">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="ns">
|
||||
<element name="ns">
|
||||
<attribute name="uri">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
<attribute name="prefix">
|
||||
<ref name="nameValue"/>
|
||||
</attribute>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<define name="p">
|
||||
<element name="p">
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="class">
|
||||
<ref name="classValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="icon">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="dir"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="param">
|
||||
<element name="param">
|
||||
<attribute name="name">
|
||||
<ref name="nameValue"/>
|
||||
</attribute>
|
||||
<attribute name="value">
|
||||
<ref name="non-empty-string"/>
|
||||
</attribute>
|
||||
</element>
|
||||
</define>
|
||||
<define name="pattern">
|
||||
<element name="pattern">
|
||||
<ref name="rich"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<choice>
|
||||
<group>
|
||||
<attribute name="abstract">
|
||||
<value>true</value>
|
||||
</attribute>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="rule"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</group>
|
||||
<group>
|
||||
<optional>
|
||||
<attribute name="abstract">
|
||||
<value>false</value>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="rule"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</group>
|
||||
<group>
|
||||
<optional>
|
||||
<attribute name="abstract">
|
||||
<value>false</value>
|
||||
</attribute>
|
||||
</optional>
|
||||
<attribute name="is-a">
|
||||
<data type="IDREF"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<ref name="title"/>
|
||||
</optional>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="param"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</group>
|
||||
</choice>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="phase">
|
||||
<element name="phase">
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<ref name="rich"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<group>
|
||||
<zeroOrMore>
|
||||
<ref name="p"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<zeroOrMore>
|
||||
<ref name="active"/>
|
||||
</zeroOrMore>
|
||||
</group>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="report">
|
||||
<element name="report">
|
||||
<attribute name="test">
|
||||
<ref name="exprValue"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="flag">
|
||||
<ref name="flagValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="diagnostics">
|
||||
<data type="IDREFS"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<ref name="linkable"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="name"/>
|
||||
<ref name="value-of"/>
|
||||
<ref name="emph"/>
|
||||
<ref name="dir"/>
|
||||
<ref name="span"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="rule">
|
||||
<element name="rule">
|
||||
<optional>
|
||||
<attribute name="flag">
|
||||
<ref name="flagValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<ref name="rich"/>
|
||||
<ref name="linkable"/>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<zeroOrMore>
|
||||
<ref name="inclusion"/>
|
||||
</zeroOrMore>
|
||||
<choice>
|
||||
<group>
|
||||
<attribute name="abstract">
|
||||
<value>true</value>
|
||||
</attribute>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<oneOrMore>
|
||||
<choice>
|
||||
<ref name="assert"/>
|
||||
<ref name="report"/>
|
||||
<ref name="extends"/>
|
||||
</choice>
|
||||
</oneOrMore>
|
||||
</group>
|
||||
<group>
|
||||
<attribute name="context">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
<optional>
|
||||
<attribute name="id">
|
||||
<data type="ID"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="abstract">
|
||||
<value>false</value>
|
||||
</attribute>
|
||||
</optional>
|
||||
<zeroOrMore>
|
||||
<ref name="let"/>
|
||||
</zeroOrMore>
|
||||
<oneOrMore>
|
||||
<choice>
|
||||
<ref name="assert"/>
|
||||
<ref name="report"/>
|
||||
<ref name="extends"/>
|
||||
</choice>
|
||||
</oneOrMore>
|
||||
</group>
|
||||
</choice>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="span">
|
||||
<element name="span">
|
||||
<attribute name="class">
|
||||
<ref name="classValue"/>
|
||||
</attribute>
|
||||
<interleave>
|
||||
<ref name="foreign"/>
|
||||
<text/>
|
||||
</interleave>
|
||||
</element>
|
||||
</define>
|
||||
<define name="title">
|
||||
<element name="title">
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<text/>
|
||||
<ref name="dir"/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</element>
|
||||
</define>
|
||||
<define name="value-of">
|
||||
<element name="value-of">
|
||||
<attribute name="select">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
<ref name="foreign-empty"/>
|
||||
</element>
|
||||
</define>
|
||||
<!-- common declarations -->
|
||||
<define name="inclusion">
|
||||
<element name="include">
|
||||
<attribute name="href">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</element>
|
||||
</define>
|
||||
<define name="rich">
|
||||
<optional>
|
||||
<attribute name="icon">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="see">
|
||||
<ref name="uriValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="fpi">
|
||||
<ref name="fpiValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="xml:lang">
|
||||
<ref name="langValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="xml:space">
|
||||
<choice>
|
||||
<value>preserve</value>
|
||||
<value>default</value>
|
||||
</choice>
|
||||
</attribute>
|
||||
</optional>
|
||||
</define>
|
||||
<define name="linkable">
|
||||
<optional>
|
||||
<attribute name="role">
|
||||
<ref name="roleValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
<optional>
|
||||
<attribute name="subject">
|
||||
<ref name="pathValue"/>
|
||||
</attribute>
|
||||
</optional>
|
||||
</define>
|
||||
<define name="foreign">
|
||||
<ref name="foreign-attributes"/>
|
||||
<zeroOrMore>
|
||||
<ref name="foreign-element"/>
|
||||
</zeroOrMore>
|
||||
</define>
|
||||
<define name="foreign-empty">
|
||||
<ref name="foreign-attributes"/>
|
||||
</define>
|
||||
<define name="foreign-attributes">
|
||||
<zeroOrMore>
|
||||
<attribute>
|
||||
<anyName>
|
||||
<except>
|
||||
<nsName ns=""/>
|
||||
<nsName ns="http://www.w3.org/XML/1998/namespace"/>
|
||||
</except>
|
||||
</anyName>
|
||||
</attribute>
|
||||
</zeroOrMore>
|
||||
</define>
|
||||
<define name="foreign-element">
|
||||
<element>
|
||||
<anyName>
|
||||
<except>
|
||||
<nsName/>
|
||||
</except>
|
||||
</anyName>
|
||||
<zeroOrMore>
|
||||
<choice>
|
||||
<attribute>
|
||||
<anyName/>
|
||||
</attribute>
|
||||
<ref name="foreign-element"/>
|
||||
<ref name="schema"/>
|
||||
<text/>
|
||||
</choice>
|
||||
</zeroOrMore>
|
||||
</element>
|
||||
</define>
|
||||
<!-- Data types -->
|
||||
<define name="uriValue">
|
||||
<data type="anyURI"/>
|
||||
</define>
|
||||
<define name="pathValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="exprValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="fpiValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="langValue">
|
||||
<data type="language"/>
|
||||
</define>
|
||||
<define name="roleValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="flagValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="nameValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<!-- In the default query language binding, xsd:NCNAME -->
|
||||
<define name="classValue">
|
||||
<data type="string" datatypeLibrary=""/>
|
||||
</define>
|
||||
<define name="non-empty-string">
|
||||
<data type="token">
|
||||
<param name="minLength">1</param>
|
||||
</data>
|
||||
</define>
|
||||
</grammar>
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Stylesheet for extracting Schematron information from a RELAX-NG schema.
|
||||
Based on the stylesheet for extracting Schematron information from W3C XML Schema.
|
||||
Created by Eddie Robertsson 2002/06/01
|
||||
2009/12/10 hj: changed Schematron namespace to ISO URI (Holger Joukl)
|
||||
-->
|
||||
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:rng="http://relaxng.org/ns/structure/1.0">
|
||||
<!-- Set the output to be XML with an XML declaration and use indentation -->
|
||||
<xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
|
||||
<!-- -->
|
||||
<!-- match schema and call recursive template to extract included schemas -->
|
||||
<!-- -->
|
||||
<xsl:template match="/rng:grammar | /rng:element">
|
||||
<!-- call the schema definition template ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current node as the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="."/>
|
||||
<!-- ... and any includes in the $include parameter -->
|
||||
<xsl:with-param name="includes" select="document(/rng:grammar/rng:include/@href
|
||||
| //rng:externalRef/@href)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- gather all included schemas into a single parameter variable -->
|
||||
<!-- -->
|
||||
<xsl:template name="gatherSchema">
|
||||
<xsl:param name="schemas"/>
|
||||
<xsl:param name="includes"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($schemas) < count($schemas | $includes)">
|
||||
<!-- when $includes includes something new, recurse ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current $includes added to the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="$schemas | $includes"/>
|
||||
<!-- ... and any *new* includes in the $include parameter -->
|
||||
<xsl:with-param name="includes" select="document($includes/rng:grammar/rng:include/@href
|
||||
| $includes//rng:externalRef/@href)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- we have the complete set of included schemas, so now let's output the embedded schematron -->
|
||||
<xsl:call-template name="output">
|
||||
<xsl:with-param name="schemas" select="$schemas"/>
|
||||
</xsl:call-template>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- output the schematron information -->
|
||||
<!-- -->
|
||||
<xsl:template name="output">
|
||||
<xsl:param name="schemas"/>
|
||||
<!-- -->
|
||||
<sch:schema>
|
||||
<!-- get header-type elements - eg title and especially ns -->
|
||||
<!-- title (just one) -->
|
||||
<xsl:copy-of select="$schemas//sch:title[1]"/>
|
||||
<!-- get remaining schematron schema children -->
|
||||
<!-- get non-blank namespace elements, dropping duplicates -->
|
||||
<xsl:for-each select="$schemas//sch:ns">
|
||||
<xsl:if test="generate-id(.) = generate-id($schemas//sch:ns[@prefix = current()/@prefix][1])">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:copy-of select="$schemas//sch:phase"/>
|
||||
<xsl:copy-of select="$schemas//sch:pattern"/>
|
||||
<sch:diagnostics>
|
||||
<xsl:copy-of select="$schemas//sch:diagnostics/*"/>
|
||||
</sch:diagnostics>
|
||||
</sch:schema>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
</xsl:transform>
|
||||
|
|
@ -0,0 +1,77 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
based on an original transform by Eddie Robertsson
|
||||
2001/04/21 fn: added support for included schemas
|
||||
2001/06/27 er: changed XMl Schema prefix from xsd: to xs: and changed to the Rec namespace
|
||||
2009/12/10 hj: changed Schematron namespace to ISO URI (Holger Joukl)
|
||||
-->
|
||||
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
||||
<!-- Set the output to be XML with an XML declaration and use indentation -->
|
||||
<xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
|
||||
<!-- -->
|
||||
<!-- match schema and call recursive template to extract included schemas -->
|
||||
<!-- -->
|
||||
<xsl:template match="xs:schema">
|
||||
<!-- call the schema definition template ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current current root as the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="/"/>
|
||||
<!-- ... and any includes in the $include parameter -->
|
||||
<xsl:with-param name="includes"
|
||||
select="document(/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- gather all included schemas into a single parameter variable -->
|
||||
<!-- -->
|
||||
<xsl:template name="gatherSchema">
|
||||
<xsl:param name="schemas"/>
|
||||
<xsl:param name="includes"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="count($schemas) < count($schemas | $includes)">
|
||||
<!-- when $includes includes something new, recurse ... -->
|
||||
<xsl:call-template name="gatherSchema">
|
||||
<!-- ... with current $includes added to the $schemas parameter ... -->
|
||||
<xsl:with-param name="schemas" select="$schemas | $includes"/>
|
||||
<!-- ... and any *new* includes in the $include parameter -->
|
||||
<xsl:with-param name="includes"
|
||||
select="document($includes/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
|
||||
</xsl:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- we have the complete set of included schemas,
|
||||
so now let's output the embedded schematron -->
|
||||
<xsl:call-template name="output">
|
||||
<xsl:with-param name="schemas" select="$schemas"/>
|
||||
</xsl:call-template>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
<!-- output the schematron information -->
|
||||
<!-- -->
|
||||
<xsl:template name="output">
|
||||
<xsl:param name="schemas"/>
|
||||
<!-- -->
|
||||
<sch:schema>
|
||||
<!-- get header-type elements - eg title and especially ns -->
|
||||
<!-- title (just one) -->
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:title[1]"/>
|
||||
<!-- get remaining schematron schema children -->
|
||||
<!-- get non-blank namespace elements, dropping duplicates -->
|
||||
<xsl:for-each select="$schemas//xs:appinfo/sch:ns">
|
||||
<xsl:if test="generate-id(.) =
|
||||
generate-id($schemas//xs:appinfo/sch:ns[@prefix = current()/@prefix][1])">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:if>
|
||||
</xsl:for-each>
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:phase"/>
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:pattern"/>
|
||||
<sch:diagnostics>
|
||||
<xsl:copy-of select="$schemas//xs:appinfo/sch:diagnostics/*"/>
|
||||
</sch:diagnostics>
|
||||
</sch:schema>
|
||||
</xsl:template>
|
||||
<!-- -->
|
||||
</xsl:transform>
|
||||
|
|
@ -0,0 +1,296 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?><?xar XSLT?>
|
||||
|
||||
<!--
|
||||
OVERVIEW - iso_abstract_expand.xsl
|
||||
|
||||
This is a preprocessor for ISO Schematron, which implements abstract patterns.
|
||||
It also
|
||||
* extracts a particular schema using an ID, where there are multiple
|
||||
schemas, such as when they are embedded in the same NVDL script
|
||||
* experimentally, allows parameter recognition and substitution inside
|
||||
text as well as @context, @test, & @select.
|
||||
|
||||
|
||||
This should be used after iso-dsdl-include.xsl and before the skeleton or
|
||||
meta-stylesheet (e.g. iso-svrl.xsl) . It only requires XSLT 1.
|
||||
|
||||
Each kind of inclusion can be turned off (or on) on the command line.
|
||||
|
||||
-->
|
||||
<!--
|
||||
VERSION INFORMATION
|
||||
2008-09-18 RJ
|
||||
* move out param test from iso:schema template to work with XSLT 1. (Noah Fontes)
|
||||
|
||||
2008-07-29 RJ
|
||||
* Create. Pull out as distinct XSL in its own namespace from old iso_pre_pro.xsl
|
||||
* Put everything in private namespace
|
||||
* Rewrite replace_substring named template so that copyright is clear
|
||||
|
||||
2008-07-24 RJ
|
||||
* correct abstract patterns so for correct names: param/@name and
|
||||
param/@value
|
||||
|
||||
2007-01-12 RJ
|
||||
* Use ISO namespace
|
||||
* Use pattern/@id not pattern/@name
|
||||
* Add Oliver Becker's suggests from old Schematron-love-in list for <copy>
|
||||
* Add XT -ism?
|
||||
2003 RJ
|
||||
* Original written for old namespace
|
||||
* http://www.topologi.com/resources/iso-pre-pro.xsl
|
||||
-->
|
||||
<!--
|
||||
LEGAL INFORMATION
|
||||
|
||||
Copyright (c) 2000-2008 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from
|
||||
the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
-->
|
||||
<xslt:stylesheet version="1.0" xmlns:xslt="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:iso="http://purl.oclc.org/dsdl/schematron"
|
||||
xmlns:nvdl="http://purl.oclc.org/dsdl/nvdl"
|
||||
|
||||
|
||||
xmlns:iae="http://www.schematron.com/namespace/iae"
|
||||
|
||||
>
|
||||
|
||||
<xslt:param name="schema-id"></xslt:param>
|
||||
|
||||
|
||||
<!-- Driver for the mode -->
|
||||
<xsl:template match="/">
|
||||
<xsl:apply-templates select="." mode="iae:go" />
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- ================================================================================== -->
|
||||
<!-- Normal processing rules -->
|
||||
<!-- ================================================================================== -->
|
||||
<!-- Output only the selected schema -->
|
||||
<xslt:template match="iso:schema" >
|
||||
<xsl:if test="string-length($schema-id) =0 or @id= $schema-id ">
|
||||
<xslt:copy>
|
||||
<xslt:copy-of select="@*" />
|
||||
<xslt:apply-templates mode="iae:go" />
|
||||
</xslt:copy>
|
||||
</xsl:if>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- Strip out any foreign elements above the Schematron schema .
|
||||
-->
|
||||
<xslt:template match="*[not(ancestor-or-self::iso:*)]" mode="iae:go" >
|
||||
<xslt:apply-templates mode="iae:go" />
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- ================================================================================== -->
|
||||
<!-- Handle Schematron abstract pattern preprocessing -->
|
||||
<!-- abstract-to-real calls
|
||||
do-pattern calls
|
||||
macro-expand calls
|
||||
multi-macro-expand
|
||||
replace-substring -->
|
||||
<!-- ================================================================================== -->
|
||||
|
||||
<!--
|
||||
Abstract patterns allow you to say, for example
|
||||
|
||||
<pattern name="htmlTable" is-a="table">
|
||||
<param name="row" value="html:tr"/>
|
||||
<param name="cell" value="html:td" />
|
||||
<param name="table" value="html:table" />
|
||||
</pattern>
|
||||
|
||||
For a good introduction, see Uche Ogbujii's article for IBM DeveloperWorks
|
||||
"Discover the flexibility of Schematron abstract patterns"
|
||||
http://www-128.ibm.com/developerworks/xml/library/x-stron.html
|
||||
However, note that ISO Schematron uses @name and @value attributes on
|
||||
the iso:param element, and @id not @name on the pattern element.
|
||||
|
||||
-->
|
||||
|
||||
<!-- Suppress declarations of abstract patterns -->
|
||||
<xslt:template match="iso:pattern[@abstract='true']" mode="iae:go" >
|
||||
<xslt:comment>Suppressed abstract pattern <xslt:value-of select="@id"/> was here</xslt:comment>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- Suppress uses of abstract patterns -->
|
||||
<xslt:template match="iso:pattern[@is-a]" mode="iae:go" >
|
||||
|
||||
<xslt:comment>Start pattern based on abstract <xslt:value-of select="@is-a"/></xslt:comment>
|
||||
|
||||
<xslt:call-template name="iae:abstract-to-real" >
|
||||
<xslt:with-param name="caller" select="@id" />
|
||||
<xslt:with-param name="is-a" select="@is-a" />
|
||||
</xslt:call-template>
|
||||
|
||||
</xslt:template>
|
||||
|
||||
|
||||
|
||||
<!-- output everything else unchanged -->
|
||||
<xslt:template match="*" priority="-1" mode="iae:go" >
|
||||
<xslt:copy>
|
||||
<xslt:copy-of select="@*" />
|
||||
<xslt:apply-templates mode="iae:go"/>
|
||||
</xslt:copy>
|
||||
</xslt:template>
|
||||
|
||||
<!-- Templates for macro expansion of abstract patterns -->
|
||||
<!-- Sets up the initial conditions for the recursive call -->
|
||||
<xslt:template name="iae:macro-expand">
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:param name="text" />
|
||||
<xslt:call-template name="iae:multi-macro-expand">
|
||||
<xslt:with-param name="caller" select="$caller"/>
|
||||
<xslt:with-param name="text" select="$text"/>
|
||||
<xslt:with-param name="paramNumber" select="1"/>
|
||||
</xslt:call-template>
|
||||
|
||||
</xslt:template>
|
||||
|
||||
<!-- Template to replace the current parameter and then
|
||||
recurse to replace subsequent parameters. -->
|
||||
|
||||
<xslt:template name="iae:multi-macro-expand">
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:param name="text" />
|
||||
<xslt:param name="paramNumber" />
|
||||
|
||||
|
||||
<xslt:choose>
|
||||
<xslt:when test="//iso:pattern[@id=$caller]/iso:param[ $paramNumber]">
|
||||
|
||||
<xslt:call-template name="iae:multi-macro-expand">
|
||||
<xslt:with-param name="caller" select="$caller"/>
|
||||
<xslt:with-param name="paramNumber" select="$paramNumber + 1"/>
|
||||
<xslt:with-param name="text" >
|
||||
<xslt:call-template name="iae:replace-substring">
|
||||
<xslt:with-param name="original" select="$text"/>
|
||||
<xslt:with-param name="substring"
|
||||
select="concat('$', //iso:pattern[@id=$caller]/iso:param[ $paramNumber ]/@name)"/>
|
||||
<xslt:with-param name="replacement"
|
||||
select="//iso:pattern[@id=$caller]/iso:param[ $paramNumber ]/@value"/>
|
||||
</xslt:call-template>
|
||||
</xslt:with-param>
|
||||
</xslt:call-template>
|
||||
</xslt:when>
|
||||
<xslt:otherwise><xslt:value-of select="$text" /></xslt:otherwise>
|
||||
|
||||
</xslt:choose>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- generate the real pattern from an abstract pattern + parameters-->
|
||||
<xslt:template name="iae:abstract-to-real" >
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:param name="is-a" />
|
||||
<xslt:for-each select="//iso:pattern[@id= $is-a]">
|
||||
<xslt:copy>
|
||||
|
||||
<xslt:choose>
|
||||
<xslt:when test=" string-length( $caller ) = 0">
|
||||
<xslt:attribute name="id"><xslt:value-of select="concat( generate-id(.) , $is-a)" /></xslt:attribute>
|
||||
</xslt:when>
|
||||
<xslt:otherwise>
|
||||
<xslt:attribute name="id"><xslt:value-of select="$caller" /></xslt:attribute>
|
||||
</xslt:otherwise>
|
||||
</xslt:choose>
|
||||
|
||||
<xslt:apply-templates select="*|text()" mode="iae:do-pattern" >
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:apply-templates>
|
||||
|
||||
</xslt:copy>
|
||||
</xslt:for-each>
|
||||
</xslt:template>
|
||||
|
||||
|
||||
<!-- Generate a non-abstract pattern -->
|
||||
<xslt:template mode="iae:do-pattern" match="*">
|
||||
<xslt:param name="caller"/>
|
||||
<xslt:copy>
|
||||
<xslt:for-each select="@*[name()='test' or name()='context' or name()='select']">
|
||||
<xslt:attribute name="{name()}">
|
||||
<xslt:call-template name="iae:macro-expand">
|
||||
<xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param>
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:call-template>
|
||||
</xslt:attribute>
|
||||
</xslt:for-each>
|
||||
<xslt:copy-of select="@*[name()!='test'][name()!='context'][name()!='select']" />
|
||||
<xsl:for-each select="node()">
|
||||
<xsl:choose>
|
||||
<!-- Experiment: replace macros in text as well, to allow parameterized assertions
|
||||
and so on, without having to have spurious <iso:value-of> calls and multiple
|
||||
delimiting -->
|
||||
<xsl:when test="self::text()">
|
||||
<xslt:call-template name="iae:macro-expand">
|
||||
<xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param>
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xslt:apply-templates select="." mode="iae:do-pattern">
|
||||
<xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param>
|
||||
</xslt:apply-templates>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:for-each>
|
||||
</xslt:copy>
|
||||
</xslt:template>
|
||||
|
||||
<!-- UTILITIES -->
|
||||
<!-- Simple version of replace-substring function -->
|
||||
<xslt:template name="iae:replace-substring">
|
||||
<xslt:param name="original" />
|
||||
<xslt:param name="substring" />
|
||||
<xslt:param name="replacement" select="''"/>
|
||||
|
||||
<xsl:choose>
|
||||
<xsl:when test="not($original)" />
|
||||
<xsl:when test="not(string($substring))">
|
||||
<xsl:value-of select="$original" />
|
||||
</xsl:when>
|
||||
<xsl:when test="contains($original, $substring)">
|
||||
<xsl:variable name="before" select="substring-before($original, $substring)" />
|
||||
<xsl:variable name="after" select="substring-after($original, $substring)" />
|
||||
|
||||
<xsl:value-of select="$before" />
|
||||
<xsl:value-of select="$replacement" />
|
||||
<!-- recursion -->
|
||||
<xsl:call-template name="iae:replace-substring">
|
||||
<xsl:with-param name="original" select="$after" />
|
||||
<xsl:with-param name="substring" select="$substring" />
|
||||
<xsl:with-param name="replacement" select="$replacement" />
|
||||
</xsl:call-template>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- no substitution -->
|
||||
<xsl:value-of select="$original" />
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xslt:template>
|
||||
|
||||
</xslt:stylesheet>
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,55 @@
|
|||
<?xml version="1.0" ?><?xar XSLT?>
|
||||
<!-- Implmentation for the Schematron XML Schema Language.
|
||||
http://www.ascc.net/xml/resource/schematron/schematron.html
|
||||
|
||||
Copyright (c) 2000,2001 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from
|
||||
the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
-->
|
||||
|
||||
<!-- Schematron message -->
|
||||
|
||||
<xsl:stylesheet
|
||||
version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias">
|
||||
|
||||
<xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/>
|
||||
|
||||
<xsl:template name="process-prolog">
|
||||
<axsl:output method="text" />
|
||||
</xsl:template>
|
||||
|
||||
<!-- use default rule for process-root: copy contens / ignore title -->
|
||||
<!-- use default rule for process-pattern: ignore name and see -->
|
||||
<!-- use default rule for process-name: output name -->
|
||||
<!-- use default rule for process-assert and process-report:
|
||||
call process-message -->
|
||||
|
||||
<xsl:template name="process-message">
|
||||
<xsl:param name="pattern" />
|
||||
<xsl:param name="role" />
|
||||
<axsl:message>
|
||||
<xsl:apply-templates mode="text"
|
||||
/> (<xsl:value-of select="$pattern" />
|
||||
<xsl:if test="$role"> / <xsl:value-of select="$role" />
|
||||
</xsl:if>)</axsl:message>
|
||||
</xsl:template>
|
||||
|
||||
</xsl:stylesheet>
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,588 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
ISO_SVRL.xsl
|
||||
|
||||
Implementation of Schematron Validation Report Language from ISO Schematron
|
||||
ISO/IEC 19757 Document Schema Definition Languages (DSDL)
|
||||
Part 3: Rule-based validation Schematron
|
||||
Annex D: Schematron Validation Report Language
|
||||
|
||||
This ISO Standard is available free as a Publicly Available Specification in PDF from ISO.
|
||||
Also see www.schematron.com for drafts and other information.
|
||||
|
||||
This implementation of SVRL is designed to run with the "Skeleton" implementation
|
||||
of Schematron which Oliver Becker devised. The skeleton code provides a
|
||||
Schematron implementation but with named templates for handling all output;
|
||||
the skeleton provides basic templates for output using this API, but client
|
||||
validators can be written to import the skeleton and override the default output
|
||||
templates as required. (In order to understand this, you must understand that
|
||||
a named template such as "process-assert" in this XSLT stylesheet overrides and
|
||||
replaces any template with the same name in the imported skeleton XSLT file.)
|
||||
|
||||
The other important thing to understand in this code is that there are different
|
||||
versions of the Schematron skeleton. These track the development of Schematron through
|
||||
Schematron 1.5, Schematron 1.6 and now ISO Schematron. One only skeleton must be
|
||||
imported. The code has templates for the different skeletons commented out for
|
||||
convenience. ISO Schematron has a different namespace than Schematron 1.5 and 1.6;
|
||||
so the ISO Schematron skeleton has been written itself with an optional import
|
||||
statement to in turn import the Schematron 1.6 skeleton. This will allow you to
|
||||
validate with schemas from either namespace.
|
||||
|
||||
|
||||
History:
|
||||
2009-03-18
|
||||
* Fix atrribute with space "see " which generates wrong name in some processors
|
||||
2008-08-11
|
||||
* RJ Fix attribute/@select which saxon allows in XSLT 1
|
||||
2008-08-07
|
||||
* RJ Add output-encoding attribute to specify final encoding to use
|
||||
* Alter allow-foreign functionality so that Schematron span, emph and dir elements make
|
||||
it to the output, for better formatting and because span can be used to mark up
|
||||
semantically interesting information embedded in diagnostics, which reduces the
|
||||
need to extend SVRL itself
|
||||
* Diagnostic-reference had an invalid attribute @id that duplicated @diagnostic: removed
|
||||
2008-08-06
|
||||
* RJ Fix invalid output: svrl:diagnostic-reference is not contained in an svrl:text
|
||||
* Output comment to SVRL file giving filename if available (from command-line parameter)
|
||||
2008-08-04
|
||||
* RJ move sch: prefix to schold: prefix to prevent confusion (we want people to
|
||||
be able to switch from old namespace to new namespace without changing the
|
||||
sch: prefix, so it is better to keep that prefix completely out of the XSLT)
|
||||
* Extra signature fixes (PH)
|
||||
2008-08-03
|
||||
* Repair missing class parameter on process-p
|
||||
2008-07-31
|
||||
* Update skeleton names
|
||||
2007-04-03
|
||||
* Add option generate-fired-rule (RG)
|
||||
2007-02-07
|
||||
* Prefer true|false for parameters. But allow yes|no on some old for compatability
|
||||
* DP Diagnostics output to svrl:text. Diagnosis put out after assertion text.
|
||||
* Removed non-SVRL elements and attributes: better handled as an extra layer that invokes this one
|
||||
* Add more formal parameters
|
||||
* Correct confusion between $schemaVersion and $queryBinding
|
||||
* Indent
|
||||
* Validate against RNC schemas for XSLT 1 and 2 (with regex tests removed)
|
||||
* Validate output with UniversalTest.sch against RNC schema for ISO SVRL
|
||||
|
||||
2007-02-01
|
||||
* DP. Update formal parameters of overriding named templates to handle more attributes.
|
||||
* DP. Refactor handling of rich and linkable parameters to a named template.
|
||||
|
||||
2007-01-22
|
||||
* DP change svrl:ns to svrl:ns-in-attribute-value
|
||||
* Change default when no queryBinding from "unknown" to "xslt"
|
||||
|
||||
2007-01-18:
|
||||
* Improve documentation
|
||||
* KH Add command-line options to generate paths or not
|
||||
* Use axsl:attribute rather than xsl:attribute to shut XSLT2 up
|
||||
* Add extra command-line options to pass to the iso_schematron_skeleton
|
||||
|
||||
2006-12-01: iso_svrl.xsl Rick Jelliffe,
|
||||
* update namespace,
|
||||
* update phase handling,
|
||||
* add flag param to process-assert and process-report & @ flag on output
|
||||
|
||||
2001: Conformance1-5.xsl Rick Jelliffe,
|
||||
* Created, using the skeleton code contributed by Oliver Becker
|
||||
-->
|
||||
<!--
|
||||
Derived from Conformance1-5.xsl.
|
||||
|
||||
Copyright (c) 2001, 2006 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
|
||||
|
||||
This software is provided 'as-is', without any express or implied warranty.
|
||||
In no event will the authors be held liable for any damages arising from
|
||||
the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it freely,
|
||||
subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not claim
|
||||
that you wrote the original software. If you use this software in a product,
|
||||
an acknowledgment in the product documentation would be appreciated but is
|
||||
not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
-->
|
||||
|
||||
<!-- Ideas nabbed from schematrons by Francis N., Miloslav N. and David C. -->
|
||||
|
||||
<!-- The command-line parameters are:
|
||||
phase NMTOKEN | "#ALL" (default) Select the phase for validation
|
||||
allow-foreign "true" | "false" (default) Pass non-Schematron elements and rich markup to the generated stylesheet
|
||||
diagnose= true | false|yes|no Add the diagnostics to the assertion test in reports (yes|no are obsolete)
|
||||
generate-paths=true|false|yes|no generate the @location attribute with XPaths (yes|no are obsolete)
|
||||
sch.exslt.imports semi-colon delimited string of filenames for some EXSLT implementations
|
||||
optimize "visit-no-attributes" Use only when the schema has no attributes as the context nodes
|
||||
generate-fired-rule "true"(default) | "false" Generate fired-rule elements
|
||||
|
||||
-->
|
||||
|
||||
<xsl:stylesheet
|
||||
version="1.0"
|
||||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
||||
xmlns:xs="http://www.w3.org/2001/XMLSchema"
|
||||
xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias"
|
||||
xmlns:schold="http://www.ascc.net/xml/schematron"
|
||||
xmlns:iso="http://purl.oclc.org/dsdl/schematron"
|
||||
xmlns:svrl="http://purl.oclc.org/dsdl/svrl"
|
||||
>
|
||||
|
||||
<!-- Select the import statement and adjust the path as
|
||||
necessary for your system.
|
||||
If not XSLT2 then also remove svrl:active-pattern/@document="{document-uri()}" from process-pattern()
|
||||
-->
|
||||
<!--
|
||||
<xsl:import href="iso_schematron_skeleton_for_saxon.xsl"/>
|
||||
-->
|
||||
|
||||
<xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/>
|
||||
<!--
|
||||
<xsl:import href="iso_schematron_skeleton.xsl"/>
|
||||
<xsl:import href="skeleton1-5.xsl"/>
|
||||
<xsl:import href="skeleton1-6.xsl"/>
|
||||
-->
|
||||
|
||||
<xsl:param name="diagnose" >true</xsl:param>
|
||||
<xsl:param name="phase" >
|
||||
<xsl:choose>
|
||||
<!-- Handle Schematron 1.5 and 1.6 phases -->
|
||||
<xsl:when test="//schold:schema/@defaultPhase">
|
||||
<xsl:value-of select="//schold:schema/@defaultPhase"/>
|
||||
</xsl:when>
|
||||
<!-- Handle ISO Schematron phases -->
|
||||
<xsl:when test="//iso:schema/@defaultPhase">
|
||||
<xsl:value-of select="//iso:schema/@defaultPhase"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>#ALL</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:param>
|
||||
<xsl:param name="allow-foreign" >false</xsl:param>
|
||||
<xsl:param name="generate-paths" >true</xsl:param>
|
||||
<xsl:param name="generate-fired-rule" >true</xsl:param>
|
||||
<xsl:param name="optimize"/>
|
||||
|
||||
<xsl:param name="output-encoding" ></xsl:param>
|
||||
|
||||
<!-- e.g. saxon file.xml file.xsl "sch.exslt.imports=.../string.xsl;.../math.xsl" -->
|
||||
<xsl:param name="sch.exslt.imports" />
|
||||
|
||||
|
||||
|
||||
<!-- Experimental: If this file called, then must be generating svrl -->
|
||||
<xsl:variable name="svrlTest" select="true()" />
|
||||
|
||||
|
||||
|
||||
<!-- ================================================================ -->
|
||||
|
||||
<xsl:template name="process-prolog">
|
||||
<axsl:output method="xml" omit-xml-declaration="no" standalone="yes"
|
||||
indent="yes">
|
||||
<xsl:if test=" string-length($output-encoding) > 0">
|
||||
<xsl:attribute name="encoding"><xsl:value-of select=" $output-encoding" /></xsl:attribute>
|
||||
</xsl:if>
|
||||
</axsl:output>
|
||||
|
||||
</xsl:template>
|
||||
|
||||
<!-- Overrides skeleton.xsl -->
|
||||
<xsl:template name="process-root">
|
||||
<xsl:param name="title"/>
|
||||
<xsl:param name="contents" />
|
||||
<xsl:param name="queryBinding" >xslt1</xsl:param>
|
||||
<xsl:param name="schemaVersion" />
|
||||
<xsl:param name="id" />
|
||||
<xsl:param name="version"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
|
||||
<svrl:schematron-output title="{$title}" schemaVersion="{$schemaVersion}" >
|
||||
<xsl:if test=" string-length( normalize-space( $phase )) > 0 and
|
||||
not( normalize-space( $phase ) = '#ALL') ">
|
||||
<axsl:attribute name="phase">
|
||||
<xsl:value-of select=" $phase " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" $allow-foreign = 'true'">
|
||||
</xsl:if>
|
||||
<xsl:if test=" $allow-foreign = 'true'">
|
||||
|
||||
<xsl:call-template name='richParms'>
|
||||
<xsl:with-param name="fpi" select="$fpi" />
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
</xsl:if>
|
||||
|
||||
<axsl:comment><axsl:value-of select="$archiveDirParameter"/>  
|
||||
<axsl:value-of select="$archiveNameParameter"/>  
|
||||
<axsl:value-of select="$fileNameParameter"/>  
|
||||
<axsl:value-of select="$fileDirParameter"/></axsl:comment>
|
||||
|
||||
|
||||
<xsl:apply-templates mode="do-schema-p" />
|
||||
<xsl:copy-of select="$contents" />
|
||||
</svrl:schematron-output>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<xsl:template name="process-assert">
|
||||
<xsl:param name="test"/>
|
||||
<xsl:param name="diagnostics" />
|
||||
<xsl:param name="id" />
|
||||
<xsl:param name="flag" />
|
||||
<!-- "Linkable" parameters -->
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:failed-assert test="{$test}" >
|
||||
<xsl:if test="string-length( $id ) > 0">
|
||||
<axsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string-length( $flag ) > 0">
|
||||
<axsl:attribute name="flag">
|
||||
<xsl:value-of select=" $flag " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name='linkableParms'>
|
||||
<xsl:with-param name="role" select="$role" />
|
||||
<xsl:with-param name="subject" select="$subject"/>
|
||||
</xsl:call-template>
|
||||
<xsl:if test=" $generate-paths = 'true' or $generate-paths= 'yes' ">
|
||||
<!-- true/false is the new way -->
|
||||
<axsl:attribute name="location">
|
||||
<axsl:apply-templates select="." mode="schematron-get-full-path"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<svrl:text>
|
||||
<xsl:apply-templates mode="text" />
|
||||
|
||||
</svrl:text>
|
||||
<xsl:if test="$diagnose = 'yes' or $diagnose= 'true' ">
|
||||
<!-- true/false is the new way -->
|
||||
<xsl:call-template name="diagnosticsSplit">
|
||||
<xsl:with-param name="str" select="$diagnostics"/>
|
||||
</xsl:call-template>
|
||||
</xsl:if>
|
||||
</svrl:failed-assert>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-report">
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="test"/>
|
||||
<xsl:param name="diagnostics"/>
|
||||
<xsl:param name="flag" />
|
||||
<!-- "Linkable" parameters -->
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:successful-report test="{$test}" >
|
||||
<xsl:if test=" string-length( $id ) > 0">
|
||||
<axsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string-length( $flag ) > 0">
|
||||
<axsl:attribute name="flag">
|
||||
<xsl:value-of select=" $flag " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:call-template name='linkableParms'>
|
||||
<xsl:with-param name="role" select="$role" />
|
||||
<xsl:with-param name="subject" select="$subject"/>
|
||||
</xsl:call-template>
|
||||
<xsl:if test=" $generate-paths = 'yes' or $generate-paths = 'true' ">
|
||||
<!-- true/false is the new way -->
|
||||
<axsl:attribute name="location">
|
||||
<axsl:apply-templates select="." mode="schematron-get-full-path"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<svrl:text>
|
||||
<xsl:apply-templates mode="text" />
|
||||
|
||||
</svrl:text>
|
||||
<xsl:if test="$diagnose = 'yes' or $diagnose='true' ">
|
||||
<!-- true/false is the new way -->
|
||||
<xsl:call-template name="diagnosticsSplit">
|
||||
<xsl:with-param name="str" select="$diagnostics"/>
|
||||
</xsl:call-template>
|
||||
</xsl:if>
|
||||
</svrl:successful-report>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-dir" >
|
||||
<xsl:param name="value" />
|
||||
<xsl:choose>
|
||||
<xsl:when test=" $allow-foreign = 'true'">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:when>
|
||||
|
||||
<xsl:otherwise>
|
||||
<!-- We generate too much whitespace rather than risking concatenation -->
|
||||
<axsl:text> </axsl:text>
|
||||
<xsl:apply-templates mode="inline-text"/>
|
||||
<axsl:text> </axsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-diagnostic">
|
||||
<xsl:param name="id"/>
|
||||
<!-- Rich parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:diagnostic-reference diagnostic="{$id}" >
|
||||
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:text>
|
||||
</xsl:text>
|
||||
|
||||
<xsl:apply-templates mode="text"/>
|
||||
|
||||
</svrl:diagnostic-reference>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-emph" >
|
||||
<xsl:param name="class" />
|
||||
<xsl:choose>
|
||||
<xsl:when test=" $allow-foreign = 'true'">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- We generate too much whitespace rather than risking concatenation -->
|
||||
<axsl:text> </axsl:text>
|
||||
<xsl:apply-templates mode="inline-text"/>
|
||||
<axsl:text> </axsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-rule">
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="context"/>
|
||||
<xsl:param name="flag"/>
|
||||
<!-- "Linkable" parameters -->
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<xsl:if test=" $generate-fired-rule = 'true'">
|
||||
<svrl:fired-rule context="{$context}" >
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:call-template name="richParms">
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
<xsl:if test=" string( $id )">
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string-length( $role ) > 0">
|
||||
<xsl:attribute name="role">
|
||||
<xsl:value-of select=" $role " />
|
||||
</xsl:attribute>
|
||||
</xsl:if>
|
||||
</svrl:fired-rule>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-ns">
|
||||
<xsl:param name="prefix"/>
|
||||
<xsl:param name="uri"/>
|
||||
<svrl:ns-prefix-in-attribute-values uri="{$uri}" prefix="{$prefix}" />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-p">
|
||||
<xsl:param name="icon"/>
|
||||
<xsl:param name="class"/>
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="lang"/>
|
||||
|
||||
<svrl:text>
|
||||
<xsl:apply-templates mode="text"/>
|
||||
</svrl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template name="process-pattern">
|
||||
<xsl:param name="name"/>
|
||||
<xsl:param name="id"/>
|
||||
<xsl:param name="is-a"/>
|
||||
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<svrl:active-pattern >
|
||||
<xsl:if test=" string( $id )">
|
||||
<axsl:attribute name="id">
|
||||
<xsl:value-of select=" $id " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test=" string( $name )">
|
||||
<axsl:attribute name="name">
|
||||
<xsl:value-of select=" $name " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:call-template name='richParms'>
|
||||
<xsl:with-param name="fpi" select="$fpi"/>
|
||||
<xsl:with-param name="icon" select="$icon"/>
|
||||
<xsl:with-param name="lang" select="$lang"/>
|
||||
<xsl:with-param name="see" select="$see" />
|
||||
<xsl:with-param name="space" select="$space" />
|
||||
</xsl:call-template>
|
||||
|
||||
<!-- ?? report that this screws up iso:title processing -->
|
||||
<xsl:apply-templates mode="do-pattern-p"/>
|
||||
<!-- ?? Seems that this apply-templates is never triggered DP -->
|
||||
<axsl:apply-templates />
|
||||
</svrl:active-pattern>
|
||||
</xsl:template>
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-message" >
|
||||
<xsl:param name="pattern"/>
|
||||
<xsl:param name="role"/>
|
||||
</xsl:template>
|
||||
|
||||
|
||||
<!-- Overrides skeleton -->
|
||||
<xsl:template name="process-span" >
|
||||
<xsl:param name="class" />
|
||||
<xsl:choose>
|
||||
<xsl:when test=" $allow-foreign = 'true'">
|
||||
<xsl:copy-of select="."/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<!-- We generate too much whitespace rather than risking concatenation -->
|
||||
<axsl:text> </axsl:text>
|
||||
<xsl:apply-templates mode="inline-text"/>
|
||||
<axsl:text> </axsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:template>
|
||||
|
||||
<!-- =========================================================================== -->
|
||||
<!-- processing rich parameters. -->
|
||||
<xsl:template name='richParms'>
|
||||
<!-- "Rich" parameters -->
|
||||
<xsl:param name="fpi" />
|
||||
<xsl:param name="icon" />
|
||||
<xsl:param name="lang" />
|
||||
<xsl:param name="see" />
|
||||
<xsl:param name="space" />
|
||||
<!-- Process rich attributes. -->
|
||||
<xsl:if test=" $allow-foreign = 'true'">
|
||||
<xsl:if test="string($fpi)">
|
||||
<axsl:attribute name="fpi">
|
||||
<xsl:value-of select="$fpi"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($icon)">
|
||||
<axsl:attribute name="icon">
|
||||
<xsl:value-of select="$icon"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($see)">
|
||||
<axsl:attribute name="see">
|
||||
<xsl:value-of select="$see"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($space)">
|
||||
<axsl:attribute name="xml:space">
|
||||
<xsl:value-of select="$space"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:if test="string($lang)">
|
||||
<axsl:attribute name="xml:lang">
|
||||
<xsl:value-of select="$lang"/>
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
</xsl:template>
|
||||
|
||||
<!-- processing linkable parameters. -->
|
||||
<xsl:template name='linkableParms'>
|
||||
<xsl:param name="role"/>
|
||||
<xsl:param name="subject"/>
|
||||
|
||||
<!-- ISO SVRL has a role attribute to match the Schematron role attribute -->
|
||||
<xsl:if test=" string($role )">
|
||||
<axsl:attribute name="role">
|
||||
<xsl:value-of select=" $role " />
|
||||
</axsl:attribute>
|
||||
</xsl:if>
|
||||
<!-- ISO SVRL does not have a subject attribute to match the Schematron subject attribute.
|
||||
Instead, the Schematron subject attribute is folded into the location attribute -->
|
||||
</xsl:template>
|
||||
|
||||
|
||||
</xsl:stylesheet>
|
||||
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
ISO SCHEMATRON 2009
|
||||
|
||||
XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
|
||||
|
||||
2009-03-18
|
||||
|
||||
Two distributions are available. One is for XSLT1 engines.
|
||||
The other is for XSLT2 engines, such as SAXON 9.
|
||||
|
||||
|
||||
This version of Schematron splits the process into a pipeline of several different XSLT stages.
|
||||
|
||||
1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
|
||||
This is a macro processor to assemble the schema from various parts.
|
||||
If your schema is not in separate parts, you can skip this stage.
|
||||
|
||||
2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
|
||||
This is a macro processor to convert abstract patterns to real patterns.
|
||||
If your schema does not use abstract patterns, you can skip this
|
||||
stage.
|
||||
|
||||
3) Third, compile the Schematron schema into an XSLT script.
|
||||
This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
|
||||
(which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
|
||||
However, other "meta-styleseets" are also in common use; the principle of operation is the same.
|
||||
If your schema uses Schematron phases, supply these as command line/invocation parameters
|
||||
to this process.
|
||||
|
||||
4) Fourth, run the script generated by stage 3 against the document being validated.
|
||||
If you are using the SVRL script, then the output of validation will be an XML document.
|
||||
If your schema uses Schematron parameters, supply these as command line/invocation parameters
|
||||
to this process.
|
||||
|
||||
|
||||
The XSLT2 distribution also features several next generation features,
|
||||
such as validating multiple documents. See the source code for details.
|
||||
|
||||
Schematron assertions can be written in any language, of course; the file
|
||||
sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
|
||||
in English, and this can be used as template to localize the skeleton's
|
||||
error messages. Note that typically programming errors in Schematron are XPath
|
||||
errors, which requires localized messages from the XSLT engine.
|
||||
|
||||
ANT
|
||||
---
|
||||
To give an example of how to process a document, here is a sample ANT task.
|
||||
|
||||
<target name="schematron-compile-test" >
|
||||
|
||||
<!-- expand inclusions -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
|
||||
<!-- expand abstract patterns -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
|
||||
|
||||
|
||||
<!-- compile it -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
|
||||
<!-- validate -->
|
||||
<xslt basedir="test/schematron"
|
||||
style="test.xsl" in="instance.xml" out="instance.svrlt">
|
||||
<classpath>
|
||||
<pathelement location="${lib.dir}/saxon9.jar"/>
|
||||
</classpath>
|
||||
</xslt>
|
||||
</target>
|
||||
219
lib/python3.5/site-packages/lxml/lxml.etree.h
Normal file
219
lib/python3.5/site-packages/lxml/lxml.etree.h
Normal file
|
|
@ -0,0 +1,219 @@
|
|||
/* Generated by Cython 0.23.4 */
|
||||
|
||||
#ifndef __PYX_HAVE__lxml__etree
|
||||
#define __PYX_HAVE__lxml__etree
|
||||
|
||||
struct LxmlDocument;
|
||||
struct LxmlElement;
|
||||
struct LxmlElementTree;
|
||||
struct LxmlElementTagMatcher;
|
||||
struct LxmlElementIterator;
|
||||
struct LxmlElementBase;
|
||||
struct LxmlElementClassLookup;
|
||||
struct LxmlFallbackElementClassLookup;
|
||||
|
||||
/* "src/lxml/lxml.etree.pyx":328
|
||||
*
|
||||
* # type of a function that steps from node to node
|
||||
* ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
*
|
||||
*/
|
||||
typedef xmlNode *(*_node_to_node_function)(xmlNode *);
|
||||
|
||||
/* "src/lxml/lxml.etree.pyx":344
|
||||
* @cython.final
|
||||
* @cython.freelist(8)
|
||||
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<<
|
||||
* u"""Internal base class to reference a libxml document.
|
||||
*
|
||||
*/
|
||||
struct LxmlDocument {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
|
||||
int _ns_counter;
|
||||
PyObject *_prefix_tail;
|
||||
xmlDoc *_c_doc;
|
||||
struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
|
||||
};
|
||||
|
||||
/* "src/lxml/lxml.etree.pyx":696
|
||||
*
|
||||
* @cython.no_gc_clear
|
||||
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<<
|
||||
* u"""Element class.
|
||||
*
|
||||
*/
|
||||
struct LxmlElement {
|
||||
PyObject_HEAD
|
||||
struct LxmlDocument *_doc;
|
||||
xmlNode *_c_node;
|
||||
PyObject *_tag;
|
||||
};
|
||||
|
||||
/* "src/lxml/lxml.etree.pyx":1858
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementTree ]:
|
||||
* cdef _Document _doc
|
||||
*/
|
||||
struct LxmlElementTree {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
|
||||
struct LxmlDocument *_doc;
|
||||
struct LxmlElement *_context_node;
|
||||
};
|
||||
|
||||
/* "src/lxml/lxml.etree.pyx":2572
|
||||
*
|
||||
*
|
||||
* cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
|
||||
* type LxmlElementTagMatcherType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementTagMatcher {
|
||||
PyObject_HEAD
|
||||
struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
|
||||
PyObject *_pystrings;
|
||||
int _node_type;
|
||||
char *_href;
|
||||
char *_name;
|
||||
};
|
||||
|
||||
/* "src/lxml/lxml.etree.pyx":2603
|
||||
* self._name = NULL
|
||||
*
|
||||
* cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
|
||||
* object LxmlElementIterator, type LxmlElementIteratorType ]:
|
||||
* """
|
||||
*/
|
||||
struct LxmlElementIterator {
|
||||
struct LxmlElementTagMatcher __pyx_base;
|
||||
struct LxmlElement *_node;
|
||||
_node_to_node_function _next_element;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":6
|
||||
* # Custom Element classes
|
||||
*
|
||||
* cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementBase ]:
|
||||
* u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
||||
*/
|
||||
struct LxmlElementBase {
|
||||
struct LxmlElement __pyx_base;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":211
|
||||
* # Element class lookup
|
||||
*
|
||||
* ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
*/
|
||||
typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
|
||||
|
||||
/* "src/lxml/classlookup.pxi":214
|
||||
*
|
||||
* # class to store element class lookup functions
|
||||
* cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
|
||||
* object LxmlElementClassLookup ]:
|
||||
* u"""ElementClassLookup(self)
|
||||
*/
|
||||
struct LxmlElementClassLookup {
|
||||
PyObject_HEAD
|
||||
_element_class_lookup_function _lookup_function;
|
||||
};
|
||||
|
||||
/* "src/lxml/classlookup.pxi":223
|
||||
* self._lookup_function = NULL # use default lookup
|
||||
*
|
||||
* cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
|
||||
* [ type LxmlFallbackElementClassLookupType,
|
||||
* object LxmlFallbackElementClassLookup ]:
|
||||
*/
|
||||
struct LxmlFallbackElementClassLookup {
|
||||
struct LxmlElementClassLookup __pyx_base;
|
||||
struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
|
||||
struct LxmlElementClassLookup *fallback;
|
||||
_element_class_lookup_function _fallback_function;
|
||||
};
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
|
||||
#ifndef __PYX_EXTERN_C
|
||||
#ifdef __cplusplus
|
||||
#define __PYX_EXTERN_C extern "C"
|
||||
#else
|
||||
#define __PYX_EXTERN_C extern
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DL_IMPORT
|
||||
#define DL_IMPORT(_T) _T
|
||||
#endif
|
||||
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
|
||||
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
|
||||
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElement) *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElementTree) *elementTreeFactory(struct LxmlElement *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElementTree) *newElementTree(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElement) *elementFactory(struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElement) *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElement) *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(void) setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlDocument) *documentOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(struct LxmlElement) *rootNodeOrRaise(PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) hasText(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) hasTail(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *textOf(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *tailOf(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) setNodeText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) setTailText(xmlNode *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *attributeValue(xmlNode *, xmlAttr *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *iterattributes(struct LxmlElement *, int);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *collectAttributes(xmlNode *, int);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) delAttribute(struct LxmlElement *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) hasChild(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(xmlNode) *findChild(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C DL_IMPORT(xmlNode) *findChildForwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C DL_IMPORT(xmlNode) *findChildBackwards(xmlNode *, Py_ssize_t);
|
||||
__PYX_EXTERN_C DL_IMPORT(xmlNode) *nextElement(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(xmlNode) *previousElement(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(void) appendChild(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C DL_IMPORT(int) appendChildToElement(struct LxmlElement *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *pyunicode(const xmlChar *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *utf8(PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *getNsTag(PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *getNsTagWithEmptyNs(PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *namespacedName(xmlNode *);
|
||||
__PYX_EXTERN_C DL_IMPORT(PyObject) *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
|
||||
__PYX_EXTERN_C DL_IMPORT(void) iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
|
||||
__PYX_EXTERN_C DL_IMPORT(void) initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
|
||||
__PYX_EXTERN_C DL_IMPORT(xmlNs) *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
PyMODINIT_FUNC initetree(void);
|
||||
#else
|
||||
PyMODINIT_FUNC PyInit_etree(void);
|
||||
#endif
|
||||
|
||||
#endif /* !__PYX_HAVE__lxml__etree */
|
||||
230
lib/python3.5/site-packages/lxml/lxml.etree_api.h
Normal file
230
lib/python3.5/site-packages/lxml/lxml.etree_api.h
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
/* Generated by Cython 0.23.4 */
|
||||
|
||||
#ifndef __PYX_HAVE_API__lxml__etree
|
||||
#define __PYX_HAVE_API__lxml__etree
|
||||
#include "Python.h"
|
||||
#include "lxml.etree.h"
|
||||
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0;
|
||||
#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory
|
||||
static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeElement __pyx_api_f_4lxml_5etree_makeElement
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0;
|
||||
#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0;
|
||||
#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0;
|
||||
#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0;
|
||||
#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback
|
||||
static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches
|
||||
static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0;
|
||||
#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise
|
||||
static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0;
|
||||
#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0;
|
||||
#define hasText __pyx_api_f_4lxml_5etree_hasText
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0;
|
||||
#define hasTail __pyx_api_f_4lxml_5etree_hasTail
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0;
|
||||
#define textOf __pyx_api_f_4lxml_5etree_textOf
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0;
|
||||
#define tailOf __pyx_api_f_4lxml_5etree_tailOf
|
||||
static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0;
|
||||
#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText
|
||||
static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0;
|
||||
#define setTailText __pyx_api_f_4lxml_5etree_setTailText
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0;
|
||||
#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0;
|
||||
#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0;
|
||||
#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes
|
||||
static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0;
|
||||
#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0;
|
||||
#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute
|
||||
static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName
|
||||
static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0;
|
||||
#define hasChild __pyx_api_f_4lxml_5etree_hasChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChild __pyx_api_f_4lxml_5etree_findChild
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0;
|
||||
#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0;
|
||||
#define nextElement __pyx_api_f_4lxml_5etree_nextElement
|
||||
static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0;
|
||||
#define previousElement __pyx_api_f_4lxml_5etree_previousElement
|
||||
static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChild __pyx_api_f_4lxml_5etree_appendChild
|
||||
static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0;
|
||||
#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0;
|
||||
#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0;
|
||||
#define utf8 __pyx_api_f_4lxml_5etree_utf8
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0;
|
||||
#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0;
|
||||
#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0;
|
||||
#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName
|
||||
static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0;
|
||||
#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName
|
||||
static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0;
|
||||
#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext
|
||||
static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0;
|
||||
#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
|
||||
static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
||||
#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
|
||||
#if !defined(__Pyx_PyIdentifier_FromString)
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
|
||||
#else
|
||||
#define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __PYX_HAVE_RT_ImportModule
|
||||
#define __PYX_HAVE_RT_ImportModule
|
||||
static PyObject *__Pyx_ImportModule(const char *name) {
|
||||
PyObject *py_name = 0;
|
||||
PyObject *py_module = 0;
|
||||
py_name = __Pyx_PyIdentifier_FromString(name);
|
||||
if (!py_name)
|
||||
goto bad;
|
||||
py_module = PyImport_Import(py_name);
|
||||
Py_DECREF(py_name);
|
||||
return py_module;
|
||||
bad:
|
||||
Py_XDECREF(py_name);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef __PYX_HAVE_RT_ImportFunction
|
||||
#define __PYX_HAVE_RT_ImportFunction
|
||||
static int __Pyx_ImportFunction(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
||||
PyObject *d = 0;
|
||||
PyObject *cobj = 0;
|
||||
union {
|
||||
void (*fp)(void);
|
||||
void *p;
|
||||
} tmp;
|
||||
d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
|
||||
if (!d)
|
||||
goto bad;
|
||||
cobj = PyDict_GetItemString(d, funcname);
|
||||
if (!cobj) {
|
||||
PyErr_Format(PyExc_ImportError,
|
||||
"%.200s does not export expected C function %.200s",
|
||||
PyModule_GetName(module), funcname);
|
||||
goto bad;
|
||||
}
|
||||
#if PY_VERSION_HEX >= 0x02070000
|
||||
if (!PyCapsule_IsValid(cobj, sig)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj));
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCapsule_GetPointer(cobj, sig);
|
||||
#else
|
||||
{const char *desc, *s1, *s2;
|
||||
desc = (const char *)PyCObject_GetDesc(cobj);
|
||||
if (!desc)
|
||||
goto bad;
|
||||
s1 = desc; s2 = sig;
|
||||
while (*s1 != '\0' && *s1 == *s2) { s1++; s2++; }
|
||||
if (*s1 != *s2) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)",
|
||||
PyModule_GetName(module), funcname, sig, desc);
|
||||
goto bad;
|
||||
}
|
||||
tmp.p = PyCObject_AsVoidPtr(cobj);}
|
||||
#endif
|
||||
*f = tmp.fp;
|
||||
if (!(*f))
|
||||
goto bad;
|
||||
Py_DECREF(d);
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(d);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int import_lxml__etree(void) {
|
||||
PyObject *module = 0;
|
||||
module = __Pyx_ImportModule("lxml.etree");
|
||||
if (!module) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
|
||||
if (__Pyx_ImportFunction(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
||||
Py_DECREF(module); module = 0;
|
||||
return 0;
|
||||
bad:
|
||||
Py_XDECREF(module);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#endif /* !__PYX_HAVE_API__lxml__etree */
|
||||
BIN
lib/python3.5/site-packages/lxml/objectify.cpython-35m-darwin.so
Executable file
BIN
lib/python3.5/site-packages/lxml/objectify.cpython-35m-darwin.so
Executable file
Binary file not shown.
3
lib/python3.5/site-packages/lxml/pyclasslookup.py
Normal file
3
lib/python3.5/site-packages/lxml/pyclasslookup.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# dummy module for backwards compatibility
|
||||
|
||||
from lxml.etree import PythonElementClassLookup
|
||||
248
lib/python3.5/site-packages/lxml/sax.py
Normal file
248
lib/python3.5/site-packages/lxml/sax.py
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
"""
|
||||
SAX-based adapter to copy trees from/to the Python standard library.
|
||||
|
||||
Use the `ElementTreeContentHandler` class to build an ElementTree from
|
||||
SAX events.
|
||||
|
||||
Use the `ElementTreeProducer` class or the `saxify()` function to fire
|
||||
the SAX events of an ElementTree against a SAX ContentHandler.
|
||||
|
||||
See http://codespeak.net/lxml/sax.html
|
||||
"""
|
||||
|
||||
from xml.sax.handler import ContentHandler
|
||||
from lxml import etree
|
||||
from lxml.etree import ElementTree, SubElement
|
||||
from lxml.etree import Comment, ProcessingInstruction
|
||||
|
||||
class SaxError(etree.LxmlError):
|
||||
"""General SAX error.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _getNsTag(tag):
|
||||
if tag[0] == '{':
|
||||
return tuple(tag[1:].split('}', 1))
|
||||
else:
|
||||
return (None, tag)
|
||||
|
||||
|
||||
class ElementTreeContentHandler(ContentHandler):
|
||||
"""Build an lxml ElementTree from SAX events.
|
||||
"""
|
||||
def __init__(self, makeelement=None):
|
||||
ContentHandler.__init__(self)
|
||||
self._root = None
|
||||
self._root_siblings = []
|
||||
self._element_stack = []
|
||||
self._default_ns = None
|
||||
self._ns_mapping = { None : [None] }
|
||||
self._new_mappings = {}
|
||||
if makeelement is None:
|
||||
makeelement = etree.Element
|
||||
self._makeelement = makeelement
|
||||
|
||||
def _get_etree(self):
|
||||
"Contains the generated ElementTree after parsing is finished."
|
||||
return ElementTree(self._root)
|
||||
|
||||
etree = property(_get_etree, doc=_get_etree.__doc__)
|
||||
|
||||
def setDocumentLocator(self, locator):
|
||||
pass
|
||||
|
||||
def startDocument(self):
|
||||
pass
|
||||
|
||||
def endDocument(self):
|
||||
pass
|
||||
|
||||
def startPrefixMapping(self, prefix, uri):
|
||||
self._new_mappings[prefix] = uri
|
||||
try:
|
||||
self._ns_mapping[prefix].append(uri)
|
||||
except KeyError:
|
||||
self._ns_mapping[prefix] = [uri]
|
||||
if prefix is None:
|
||||
self._default_ns = uri
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
ns_uri_list = self._ns_mapping[prefix]
|
||||
ns_uri_list.pop()
|
||||
if prefix is None:
|
||||
self._default_ns = ns_uri_list[-1]
|
||||
|
||||
def _buildTag(self, ns_name_tuple):
|
||||
ns_uri, local_name = ns_name_tuple
|
||||
if ns_uri:
|
||||
el_tag = "{%s}%s" % ns_name_tuple
|
||||
elif self._default_ns:
|
||||
el_tag = "{%s}%s" % (self._default_ns, local_name)
|
||||
else:
|
||||
el_tag = local_name
|
||||
return el_tag
|
||||
|
||||
def startElementNS(self, ns_name, qname, attributes=None):
|
||||
el_name = self._buildTag(ns_name)
|
||||
if attributes:
|
||||
attrs = {}
|
||||
try:
|
||||
iter_attributes = attributes.iteritems()
|
||||
except AttributeError:
|
||||
iter_attributes = attributes.items()
|
||||
|
||||
for name_tuple, value in iter_attributes:
|
||||
if name_tuple[0]:
|
||||
attr_name = "{%s}%s" % name_tuple
|
||||
else:
|
||||
attr_name = name_tuple[1]
|
||||
attrs[attr_name] = value
|
||||
else:
|
||||
attrs = None
|
||||
|
||||
element_stack = self._element_stack
|
||||
if self._root is None:
|
||||
element = self._root = \
|
||||
self._makeelement(el_name, attrs, self._new_mappings)
|
||||
if self._root_siblings and hasattr(element, 'addprevious'):
|
||||
for sibling in self._root_siblings:
|
||||
element.addprevious(sibling)
|
||||
del self._root_siblings[:]
|
||||
else:
|
||||
element = SubElement(element_stack[-1], el_name,
|
||||
attrs, self._new_mappings)
|
||||
element_stack.append(element)
|
||||
|
||||
self._new_mappings.clear()
|
||||
|
||||
def processingInstruction(self, target, data):
|
||||
pi = ProcessingInstruction(target, data)
|
||||
if self._root is None:
|
||||
self._root_siblings.append(pi)
|
||||
else:
|
||||
self._element_stack[-1].append(pi)
|
||||
|
||||
def endElementNS(self, ns_name, qname):
|
||||
element = self._element_stack.pop()
|
||||
el_tag = self._buildTag(ns_name)
|
||||
if el_tag != element.tag:
|
||||
raise SaxError("Unexpected element closed: " + el_tag)
|
||||
|
||||
def startElement(self, name, attributes=None):
|
||||
if attributes:
|
||||
attributes = dict(
|
||||
[((None, k), v) for k, v in attributes.items()]
|
||||
)
|
||||
self.startElementNS((None, name), name, attributes)
|
||||
|
||||
def endElement(self, name):
|
||||
self.endElementNS((None, name), name)
|
||||
|
||||
def characters(self, data):
|
||||
last_element = self._element_stack[-1]
|
||||
try:
|
||||
# if there already is a child element, we must append to its tail
|
||||
last_element = last_element[-1]
|
||||
last_element.tail = (last_element.tail or '') + data
|
||||
except IndexError:
|
||||
# otherwise: append to the text
|
||||
last_element.text = (last_element.text or '') + data
|
||||
|
||||
ignorableWhitespace = characters
|
||||
|
||||
|
||||
class ElementTreeProducer(object):
|
||||
"""Produces SAX events for an element and children.
|
||||
"""
|
||||
def __init__(self, element_or_tree, content_handler):
|
||||
try:
|
||||
element = element_or_tree.getroot()
|
||||
except AttributeError:
|
||||
element = element_or_tree
|
||||
self._element = element
|
||||
self._content_handler = content_handler
|
||||
from xml.sax.xmlreader import AttributesNSImpl as attr_class
|
||||
self._attr_class = attr_class
|
||||
self._empty_attributes = attr_class({}, {})
|
||||
|
||||
def saxify(self):
|
||||
self._content_handler.startDocument()
|
||||
|
||||
element = self._element
|
||||
if hasattr(element, 'getprevious'):
|
||||
siblings = []
|
||||
sibling = element.getprevious()
|
||||
while getattr(sibling, 'tag', None) is ProcessingInstruction:
|
||||
siblings.append(sibling)
|
||||
sibling = sibling.getprevious()
|
||||
for sibling in siblings[::-1]:
|
||||
self._recursive_saxify(sibling, {})
|
||||
|
||||
self._recursive_saxify(element, {})
|
||||
|
||||
if hasattr(element, 'getnext'):
|
||||
sibling = element.getnext()
|
||||
while getattr(sibling, 'tag', None) is ProcessingInstruction:
|
||||
self._recursive_saxify(sibling, {})
|
||||
sibling = sibling.getnext()
|
||||
|
||||
self._content_handler.endDocument()
|
||||
|
||||
def _recursive_saxify(self, element, prefixes):
|
||||
content_handler = self._content_handler
|
||||
tag = element.tag
|
||||
if tag is Comment or tag is ProcessingInstruction:
|
||||
if tag is ProcessingInstruction:
|
||||
content_handler.processingInstruction(
|
||||
element.target, element.text)
|
||||
if element.tail:
|
||||
content_handler.characters(element.tail)
|
||||
return
|
||||
|
||||
new_prefixes = []
|
||||
build_qname = self._build_qname
|
||||
attribs = element.items()
|
||||
if attribs:
|
||||
attr_values = {}
|
||||
attr_qnames = {}
|
||||
for attr_ns_name, value in attribs:
|
||||
attr_ns_tuple = _getNsTag(attr_ns_name)
|
||||
attr_values[attr_ns_tuple] = value
|
||||
attr_qnames[attr_ns_tuple] = build_qname(
|
||||
attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
|
||||
sax_attributes = self._attr_class(attr_values, attr_qnames)
|
||||
else:
|
||||
sax_attributes = self._empty_attributes
|
||||
|
||||
ns_uri, local_name = _getNsTag(tag)
|
||||
qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
|
||||
|
||||
for prefix, uri in new_prefixes:
|
||||
content_handler.startPrefixMapping(prefix, uri)
|
||||
content_handler.startElementNS((ns_uri, local_name),
|
||||
qname, sax_attributes)
|
||||
if element.text:
|
||||
content_handler.characters(element.text)
|
||||
for child in element:
|
||||
self._recursive_saxify(child, prefixes)
|
||||
content_handler.endElementNS((ns_uri, local_name), qname)
|
||||
for prefix, uri in new_prefixes:
|
||||
content_handler.endPrefixMapping(prefix)
|
||||
if element.tail:
|
||||
content_handler.characters(element.tail)
|
||||
|
||||
def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
|
||||
if ns_uri is None:
|
||||
return local_name
|
||||
try:
|
||||
prefix = prefixes[ns_uri]
|
||||
except KeyError:
|
||||
prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
|
||||
new_prefixes.append( (prefix, ns_uri) )
|
||||
return prefix + ':' + local_name
|
||||
|
||||
def saxify(element_or_tree, content_handler):
|
||||
"""One-shot helper to generate SAX events from an XML tree and fire
|
||||
them against a SAX ContentHandler.
|
||||
"""
|
||||
return ElementTreeProducer(element_or_tree, content_handler).saxify()
|
||||
13
lib/python3.5/site-packages/lxml/usedoctest.py
Normal file
13
lib/python3.5/site-packages/lxml/usedoctest.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""Doctest module for XML comparison.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import lxml.usedoctest
|
||||
>>> # now do your XML doctests ...
|
||||
|
||||
See `lxml.doctestcompare`
|
||||
"""
|
||||
|
||||
from lxml import doctestcompare
|
||||
|
||||
doctestcompare.temp_install(del_module=__name__)
|
||||
Loading…
Add table
Add a link
Reference in a new issue