update to python3.7
This commit is contained in:
parent
da2d24a7f4
commit
80c4a755da
2912 changed files with 206832 additions and 100407 deletions
288
lib/python3.7/site-packages/asn1crypto/_iri.py
Normal file
288
lib/python3.7/site-packages/asn1crypto/_iri.py
Normal file
|
|
@ -0,0 +1,288 @@
|
|||
# coding: utf-8
|
||||
|
||||
"""
|
||||
Functions to convert unicode IRIs into ASCII byte string URIs and back. Exports
|
||||
the following items:
|
||||
|
||||
- iri_to_uri()
|
||||
- uri_to_iri()
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
|
||||
from encodings import idna # noqa
|
||||
import codecs
|
||||
import re
|
||||
import sys
|
||||
|
||||
from ._errors import unwrap
|
||||
from ._types import byte_cls, str_cls, type_name, bytes_to_list, int_types
|
||||
|
||||
if sys.version_info < (3,):
|
||||
from urlparse import urlsplit, urlunsplit
|
||||
from urllib import (
|
||||
quote as urlquote,
|
||||
unquote as unquote_to_bytes,
|
||||
)
|
||||
|
||||
else:
|
||||
from urllib.parse import (
|
||||
quote as urlquote,
|
||||
unquote_to_bytes,
|
||||
urlsplit,
|
||||
urlunsplit,
|
||||
)
|
||||
|
||||
|
||||
def iri_to_uri(value):
|
||||
"""
|
||||
Normalizes and encodes a unicode IRI into an ASCII byte string URI
|
||||
|
||||
:param value:
|
||||
A unicode string of an IRI
|
||||
|
||||
:return:
|
||||
A byte string of the ASCII-encoded URI
|
||||
"""
|
||||
|
||||
if not isinstance(value, str_cls):
|
||||
raise TypeError(unwrap(
|
||||
'''
|
||||
value must be a unicode string, not %s
|
||||
''',
|
||||
type_name(value)
|
||||
))
|
||||
|
||||
scheme = None
|
||||
# Python 2.6 doesn't split properly is the URL doesn't start with http:// or https://
|
||||
if sys.version_info < (2, 7) and not value.startswith('http://') and not value.startswith('https://'):
|
||||
real_prefix = None
|
||||
prefix_match = re.match('^[^:]*://', value)
|
||||
if prefix_match:
|
||||
real_prefix = prefix_match.group(0)
|
||||
value = 'http://' + value[len(real_prefix):]
|
||||
parsed = urlsplit(value)
|
||||
if real_prefix:
|
||||
value = real_prefix + value[7:]
|
||||
scheme = _urlquote(real_prefix[:-3])
|
||||
else:
|
||||
parsed = urlsplit(value)
|
||||
|
||||
if scheme is None:
|
||||
scheme = _urlquote(parsed.scheme)
|
||||
hostname = parsed.hostname
|
||||
if hostname is not None:
|
||||
hostname = hostname.encode('idna')
|
||||
# RFC 3986 allows userinfo to contain sub-delims
|
||||
username = _urlquote(parsed.username, safe='!$&\'()*+,;=')
|
||||
password = _urlquote(parsed.password, safe='!$&\'()*+,;=')
|
||||
port = parsed.port
|
||||
if port is not None:
|
||||
port = str_cls(port).encode('ascii')
|
||||
|
||||
netloc = b''
|
||||
if username is not None:
|
||||
netloc += username
|
||||
if password:
|
||||
netloc += b':' + password
|
||||
netloc += b'@'
|
||||
if hostname is not None:
|
||||
netloc += hostname
|
||||
if port is not None:
|
||||
default_http = scheme == b'http' and port == b'80'
|
||||
default_https = scheme == b'https' and port == b'443'
|
||||
if not default_http and not default_https:
|
||||
netloc += b':' + port
|
||||
|
||||
# RFC 3986 allows a path to contain sub-delims, plus "@" and ":"
|
||||
path = _urlquote(parsed.path, safe='/!$&\'()*+,;=@:')
|
||||
# RFC 3986 allows the query to contain sub-delims, plus "@", ":" , "/" and "?"
|
||||
query = _urlquote(parsed.query, safe='/?!$&\'()*+,;=@:')
|
||||
# RFC 3986 allows the fragment to contain sub-delims, plus "@", ":" , "/" and "?"
|
||||
fragment = _urlquote(parsed.fragment, safe='/?!$&\'()*+,;=@:')
|
||||
|
||||
if query is None and fragment is None and path == b'/':
|
||||
path = None
|
||||
|
||||
# Python 2.7 compat
|
||||
if path is None:
|
||||
path = ''
|
||||
|
||||
output = urlunsplit((scheme, netloc, path, query, fragment))
|
||||
if isinstance(output, str_cls):
|
||||
output = output.encode('latin1')
|
||||
return output
|
||||
|
||||
|
||||
def uri_to_iri(value):
|
||||
"""
|
||||
Converts an ASCII URI byte string into a unicode IRI
|
||||
|
||||
:param value:
|
||||
An ASCII-encoded byte string of the URI
|
||||
|
||||
:return:
|
||||
A unicode string of the IRI
|
||||
"""
|
||||
|
||||
if not isinstance(value, byte_cls):
|
||||
raise TypeError(unwrap(
|
||||
'''
|
||||
value must be a byte string, not %s
|
||||
''',
|
||||
type_name(value)
|
||||
))
|
||||
|
||||
parsed = urlsplit(value)
|
||||
|
||||
scheme = parsed.scheme
|
||||
if scheme is not None:
|
||||
scheme = scheme.decode('ascii')
|
||||
|
||||
username = _urlunquote(parsed.username, remap=[':', '@'])
|
||||
password = _urlunquote(parsed.password, remap=[':', '@'])
|
||||
hostname = parsed.hostname
|
||||
if hostname:
|
||||
hostname = hostname.decode('idna')
|
||||
port = parsed.port
|
||||
if port and not isinstance(port, int_types):
|
||||
port = port.decode('ascii')
|
||||
|
||||
netloc = ''
|
||||
if username is not None:
|
||||
netloc += username
|
||||
if password:
|
||||
netloc += ':' + password
|
||||
netloc += '@'
|
||||
if hostname is not None:
|
||||
netloc += hostname
|
||||
if port is not None:
|
||||
netloc += ':' + str_cls(port)
|
||||
|
||||
path = _urlunquote(parsed.path, remap=['/'], preserve=True)
|
||||
query = _urlunquote(parsed.query, remap=['&', '='], preserve=True)
|
||||
fragment = _urlunquote(parsed.fragment)
|
||||
|
||||
return urlunsplit((scheme, netloc, path, query, fragment))
|
||||
|
||||
|
||||
def _iri_utf8_errors_handler(exc):
|
||||
"""
|
||||
Error handler for decoding UTF-8 parts of a URI into an IRI. Leaves byte
|
||||
sequences encoded in %XX format, but as part of a unicode string.
|
||||
|
||||
:param exc:
|
||||
The UnicodeDecodeError exception
|
||||
|
||||
:return:
|
||||
A 2-element tuple of (replacement unicode string, integer index to
|
||||
resume at)
|
||||
"""
|
||||
|
||||
bytes_as_ints = bytes_to_list(exc.object[exc.start:exc.end])
|
||||
replacements = ['%%%02x' % num for num in bytes_as_ints]
|
||||
return (''.join(replacements), exc.end)
|
||||
|
||||
|
||||
codecs.register_error('iriutf8', _iri_utf8_errors_handler)
|
||||
|
||||
|
||||
def _urlquote(string, safe=''):
|
||||
"""
|
||||
Quotes a unicode string for use in a URL
|
||||
|
||||
:param string:
|
||||
A unicode string
|
||||
|
||||
:param safe:
|
||||
A unicode string of character to not encode
|
||||
|
||||
:return:
|
||||
None (if string is None) or an ASCII byte string of the quoted string
|
||||
"""
|
||||
|
||||
if string is None or string == '':
|
||||
return None
|
||||
|
||||
# Anything already hex quoted is pulled out of the URL and unquoted if
|
||||
# possible
|
||||
escapes = []
|
||||
if re.search('%[0-9a-fA-F]{2}', string):
|
||||
# Try to unquote any percent values, restoring them if they are not
|
||||
# valid UTF-8. Also, requote any safe chars since encoded versions of
|
||||
# those are functionally different than the unquoted ones.
|
||||
def _try_unescape(match):
|
||||
byte_string = unquote_to_bytes(match.group(0))
|
||||
unicode_string = byte_string.decode('utf-8', 'iriutf8')
|
||||
for safe_char in list(safe):
|
||||
unicode_string = unicode_string.replace(safe_char, '%%%02x' % ord(safe_char))
|
||||
return unicode_string
|
||||
string = re.sub('(?:%[0-9a-fA-F]{2})+', _try_unescape, string)
|
||||
|
||||
# Once we have the minimal set of hex quoted values, removed them from
|
||||
# the string so that they are not double quoted
|
||||
def _extract_escape(match):
|
||||
escapes.append(match.group(0).encode('ascii'))
|
||||
return '\x00'
|
||||
string = re.sub('%[0-9a-fA-F]{2}', _extract_escape, string)
|
||||
|
||||
output = urlquote(string.encode('utf-8'), safe=safe.encode('utf-8'))
|
||||
if not isinstance(output, byte_cls):
|
||||
output = output.encode('ascii')
|
||||
|
||||
# Restore the existing quoted values that we extracted
|
||||
if len(escapes) > 0:
|
||||
def _return_escape(_):
|
||||
return escapes.pop(0)
|
||||
output = re.sub(b'%00', _return_escape, output)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def _urlunquote(byte_string, remap=None, preserve=None):
|
||||
"""
|
||||
Unquotes a URI portion from a byte string into unicode using UTF-8
|
||||
|
||||
:param byte_string:
|
||||
A byte string of the data to unquote
|
||||
|
||||
:param remap:
|
||||
A list of characters (as unicode) that should be re-mapped to a
|
||||
%XX encoding. This is used when characters are not valid in part of a
|
||||
URL.
|
||||
|
||||
:param preserve:
|
||||
A bool - indicates that the chars to be remapped if they occur in
|
||||
non-hex form, should be preserved. E.g. / for URL path.
|
||||
|
||||
:return:
|
||||
A unicode string
|
||||
"""
|
||||
|
||||
if byte_string is None:
|
||||
return byte_string
|
||||
|
||||
if byte_string == b'':
|
||||
return ''
|
||||
|
||||
if preserve:
|
||||
replacements = ['\x1A', '\x1C', '\x1D', '\x1E', '\x1F']
|
||||
preserve_unmap = {}
|
||||
for char in remap:
|
||||
replacement = replacements.pop(0)
|
||||
preserve_unmap[replacement] = char
|
||||
byte_string = byte_string.replace(char.encode('ascii'), replacement.encode('ascii'))
|
||||
|
||||
byte_string = unquote_to_bytes(byte_string)
|
||||
|
||||
if remap:
|
||||
for char in remap:
|
||||
byte_string = byte_string.replace(char.encode('ascii'), ('%%%02x' % ord(char)).encode('ascii'))
|
||||
|
||||
output = byte_string.decode('utf-8', 'iriutf8')
|
||||
|
||||
if preserve:
|
||||
for replacement, original in preserve_unmap.items():
|
||||
output = output.replace(replacement, original)
|
||||
|
||||
return output
|
||||
Loading…
Add table
Add a link
Reference in a new issue