update shared dependencies

This commit is contained in:
j 2016-02-23 11:36:55 +05:30
commit 736cd598a8
521 changed files with 45146 additions and 22574 deletions

View file

@ -1,17 +1,9 @@
# urllib3/__init__.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
urllib3 - Thread-safe connection pooling and re-using.
"""
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
__version__ = 'dev'
from __future__ import absolute_import
import warnings
from .connectionpool import (
HTTPConnectionPool,
@ -23,7 +15,10 @@ from . import exceptions
from .filepost import encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import HTTPResponse
from .util import make_headers, get_host, Timeout
from .util.request import make_headers
from .util.url import get_host
from .util.timeout import Timeout
from .util.retry import Retry
# Set default logging handler to avoid "No handler found" warnings.
@ -35,8 +30,30 @@ except ImportError:
def emit(self, record):
pass
__author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
__license__ = 'MIT'
__version__ = '1.13.1'
__all__ = (
'HTTPConnectionPool',
'HTTPSConnectionPool',
'PoolManager',
'ProxyManager',
'HTTPResponse',
'Retry',
'Timeout',
'add_stderr_logger',
'connection_from_url',
'disable_warnings',
'encode_multipart_formdata',
'get_host',
'make_headers',
'proxy_from_url',
)
logging.getLogger(__name__).addHandler(NullHandler())
def add_stderr_logger(level=logging.DEBUG):
"""
Helper for quickly adding a StreamHandler to the logger. Useful for
@ -51,8 +68,26 @@ def add_stderr_logger(level=logging.DEBUG):
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
logger.addHandler(handler)
logger.setLevel(level)
logger.debug('Added an stderr logging handler to logger: %s' % __name__)
logger.debug('Added a stderr logging handler to logger: %s' % __name__)
return handler
# ... Clean up.
del NullHandler
# SecurityWarning's always go off by default.
warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
# SubjectAltNameWarning's should go off once per host
warnings.simplefilter('default', exceptions.SubjectAltNameWarning)
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
append=True)
# SNIMissingWarnings should go off only once.
warnings.simplefilter('default', exceptions.SNIMissingWarning)
def disable_warnings(category=exceptions.HTTPWarning):
"""
Helper for quickly disabling all urllib3 warnings.
"""
warnings.simplefilter('ignore', category)

View file

@ -1,13 +1,8 @@
# urllib3/_collections.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
from collections import Mapping, MutableMapping
try:
from threading import RLock
except ImportError: # Platform-specific: No threads available
except ImportError: # Platform-specific: No threads available
class RLock:
def __enter__(self):
pass
@ -16,11 +11,11 @@ except ImportError: # Platform-specific: No threads available
pass
try: # Python 2.7+
try: # Python 2.7+
from collections import OrderedDict
except ImportError:
from .packages.ordered_dict import OrderedDict
from .packages.six import itervalues
from .packages.six import iterkeys, itervalues, PY3
__all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
@ -91,8 +86,7 @@ class RecentlyUsedContainer(MutableMapping):
def clear(self):
with self.lock:
# Copy pointers to all values, then wipe the mapping
# under Python 2, this copies the list of values twice :-|
values = list(self._container.values())
values = list(itervalues(self._container))
self._container.clear()
if self.dispose_func:
@ -101,7 +95,7 @@ class RecentlyUsedContainer(MutableMapping):
def keys(self):
with self.lock:
return self._container.keys()
return list(iterkeys(self._container))
class HTTPHeaderDict(MutableMapping):
@ -116,7 +110,7 @@ class HTTPHeaderDict(MutableMapping):
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
RFC 2616. Iteration provides the first case-sensitive key seen for each
RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
@ -136,25 +130,82 @@ class HTTPHeaderDict(MutableMapping):
'foo=bar, baz=quxx'
>>> headers['Content-Length']
'7'
If you want to access the raw headers with their original casing
for debugging purposes you can access the private ``._data`` attribute
which is a normal python ``dict`` that maps the case-insensitive key to a
list of tuples stored as (case-sensitive-original-name, value). Using the
structure from above as our example:
>>> headers._data
{'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')],
'content-length': [('content-length', '7')]}
"""
def __init__(self, headers=None, **kwargs):
self._data = {}
if headers is None:
headers = {}
self.update(headers, **kwargs)
super(HTTPHeaderDict, self).__init__()
self._container = {}
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
else:
self.extend(headers)
if kwargs:
self.extend(kwargs)
def add(self, key, value):
def __setitem__(self, key, val):
self._container[key.lower()] = (key, val)
return self._container[key.lower()]
def __getitem__(self, key):
val = self._container[key.lower()]
return ', '.join(val[1:])
def __delitem__(self, key):
del self._container[key.lower()]
def __contains__(self, key):
return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, 'keys'):
return False
if not isinstance(other, type(self)):
other = type(self)(other)
return (dict((k.lower(), v) for k, v in self.itermerged()) ==
dict((k.lower(), v) for k, v in other.itermerged()))
def __ne__(self, other):
return not self.__eq__(other)
if not PY3: # Python 2
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
def __len__(self):
return len(self._container)
def __iter__(self):
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def pop(self, key, default=__marker):
'''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
'''
# Using the MutableMapping function directly fails due to the private marker.
# Using ordinary dict.pop would expose the internal structures.
# So let's reinvent the wheel.
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def discard(self, key):
try:
del self[key]
except KeyError:
pass
def add(self, key, val):
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
@ -163,43 +214,111 @@ class HTTPHeaderDict(MutableMapping):
>>> headers['foo']
'bar, baz'
"""
self._data.setdefault(key.lower(), []).append((key, value))
key_lower = key.lower()
new_vals = key, val
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# new_vals was not inserted, as there was a previous one
if isinstance(vals, list):
# If already several items got inserted, we have a list
vals.append(val)
else:
# vals should be a tuple then, i.e. only one item so far
# Need to convert the tuple to list for further extension
self._container[key_lower] = [vals[0], vals[1], val]
def extend(self, *args, **kwargs):
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError("extend() takes at most 1 positional "
"arguments ({0} given)".format(len(args)))
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, Mapping):
for key in other:
self.add(key, other[key])
elif hasattr(other, "keys"):
for key in other.keys():
self.add(key, other[key])
else:
for key, value in other:
self.add(key, value)
for key, value in kwargs.items():
self.add(key, value)
def getlist(self, key):
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
return self[key].split(', ') if key in self else []
try:
vals = self._container[key.lower()]
except KeyError:
return []
else:
if isinstance(vals, tuple):
return [vals[1]]
else:
return vals[1:]
def copy(self):
h = HTTPHeaderDict()
for key in self._data:
for rawkey, value in self._data[key]:
h.add(rawkey, value)
return h
def __eq__(self, other):
if not isinstance(other, Mapping):
return False
other = HTTPHeaderDict(other)
return dict((k1, self[k1]) for k1 in self._data) == \
dict((k2, other[k2]) for k2 in other._data)
def __getitem__(self, key):
values = self._data[key.lower()]
return ', '.join(value[1] for value in values)
def __setitem__(self, key, value):
self._data[key.lower()] = [(key, value)]
def __delitem__(self, key):
del self._data[key.lower()]
def __len__(self):
return len(self._data)
def __iter__(self):
for headers in itervalues(self._data):
yield headers[0][0]
# Backwards compatibility for httplib
getheaders = getlist
getallmatchingheaders = getlist
iget = getlist
def __repr__(self):
return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
def _copy_from(self, other):
for key in other:
val = other.getlist(key)
if isinstance(val, list):
# Don't need to convert tuples
val = list(val)
self._container[key.lower()] = [key] + val
def copy(self):
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self):
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self):
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ', '.join(val[1:])
def items(self):
return list(self.iteritems())
@classmethod
def from_httplib(cls, message): # Python 2
"""Read headers from a Python 2 httplib message object."""
# python2.7 does not expose a proper API for exporting multiheaders
# efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
headers = []
for line in message.headers:
if line.startswith((' ', '\t')):
key, value = headers[-1]
headers[-1] = (key, value + '\r\n' + line.rstrip())
continue
key, value = line.split(':', 1)
headers.append((key, value.strip()))
return cls(headers)

View file

@ -1,95 +1,149 @@
# urllib3/connection.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
import datetime
import os
import sys
import socket
from socket import timeout as SocketTimeout
from socket import error as SocketError, timeout as SocketTimeout
import warnings
from .packages import six
try: # Python 3
from http.client import HTTPConnection as _HTTPConnection, HTTPException
try: # Python 3
from http.client import HTTPConnection as _HTTPConnection
from http.client import HTTPException # noqa: unused in this module
except ImportError:
from httplib import HTTPConnection as _HTTPConnection, HTTPException
from httplib import HTTPConnection as _HTTPConnection
from httplib import HTTPException # noqa: unused in this module
class DummyConnection(object):
"Used to detect a failed ConnectionCls import."
pass
try: # Compiled with SSL?
try: # Compiled with SSL?
import ssl
BaseSSLError = ssl.SSLError
except (ImportError, AttributeError): # Platform-specific: No SSL.
ssl = None
HTTPSConnection = DummyConnection
class BaseSSLError(BaseException):
pass
try: # Python 3
from http.client import HTTPSConnection as _HTTPSConnection
except ImportError:
from httplib import HTTPSConnection as _HTTPSConnection
import ssl
BaseSSLError = ssl.SSLError
try: # Python 3:
# Not a no-op, we're adding this to the namespace so it can be imported.
ConnectionError = ConnectionError
except NameError: # Python 2:
class ConnectionError(Exception):
pass
except (ImportError, AttributeError): # Platform-specific: No SSL.
pass
from .exceptions import (
NewConnectionError,
ConnectTimeoutError,
SubjectAltNameWarning,
SystemTimeWarning,
)
from .packages.ssl_match_hostname import match_hostname
from .packages import six
from .util import (
assert_fingerprint,
from .util.ssl_ import (
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
assert_fingerprint,
)
from .util import connection
port_by_scheme = {
'http': 80,
'https': 443,
}
RECENT_DATE = datetime.date(2014, 1, 1)
class DummyConnection(object):
"""Used to detect a failed ConnectionCls import."""
pass
class HTTPConnection(_HTTPConnection, object):
"""
Based on httplib.HTTPConnection but provides an extra constructor
backwards-compatibility layer between older and newer Pythons.
Additional keyword parameters are used to configure attributes of the connection.
Accepted parameters include:
- ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
- ``source_address``: Set the source address for the current connection.
.. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
you might pass::
HTTPConnection.default_socket_options + [
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
]
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
"""
default_port = port_by_scheme['http']
# By default, disable Nagle's Algorithm.
tcp_nodelay = 1
#: Disable Nagle's algorithm by default.
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
#: Whether this connection verifies the host's certificate.
is_verified = False
def __init__(self, *args, **kw):
if six.PY3: # Python 3
kw.pop('strict', None)
if sys.version_info < (2, 7): # Python 2.6 and older
kw.pop('source_address', None)
# Pre-set source_address in case we have an older Python like 2.6.
self.source_address = kw.get('source_address')
if sys.version_info < (2, 7): # Python 2.6
# _HTTPConnection on Python 2.6 will balk at this keyword arg, but
# not newer versions. We can still use it when creating a
# connection though, so we pop it *after* we have saved it as
# self.source_address.
kw.pop('source_address', None)
#: The socket options provided by the user. If no options are
#: provided, we use the default options.
self.socket_options = kw.pop('socket_options', self.default_socket_options)
# Superclass also sets self.source_address in Python 2.7+.
_HTTPConnection.__init__(self, *args, **kw)
_HTTPConnection.__init__(self, *args, **kw)
def _new_conn(self):
""" Establish a socket connection and set nodelay settings on it.
:return: a new socket connection
:return: New socket connection.
"""
extra_args = []
if self.source_address: # Python 2.7+
extra_args.append(self.source_address)
extra_kw = {}
if self.source_address:
extra_kw['source_address'] = self.source_address
conn = socket.create_connection(
(self.host, self.port), self.timeout, *extra_args)
conn.setsockopt(
socket.IPPROTO_TCP, socket.TCP_NODELAY, self.tcp_nodelay)
if self.socket_options:
extra_kw['socket_options'] = self.socket_options
try:
conn = connection.create_connection(
(self.host, self.port), self.timeout, **extra_kw)
except SocketTimeout as e:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
except SocketError as e:
raise NewConnectionError(
self, "Failed to establish a new connection: %s" % e)
return conn
@ -101,6 +155,8 @@ class HTTPConnection(_HTTPConnection, object):
if getattr(self, '_tunnel_host', None):
# TODO: Fix tunnel so it doesn't depend on self.sock state.
self._tunnel()
# Mark this connection as not reusable
self.auto_open = 0
def connect(self):
conn = self._new_conn()
@ -136,34 +192,29 @@ class VerifiedHTTPSConnection(HTTPSConnection):
"""
cert_reqs = None
ca_certs = None
ca_cert_dir = None
ssl_version = None
conn_kw = {}
assert_fingerprint = None
def set_cert(self, key_file=None, cert_file=None,
cert_reqs=None, ca_certs=None,
assert_hostname=None, assert_fingerprint=None):
assert_hostname=None, assert_fingerprint=None,
ca_cert_dir=None):
if (ca_certs or ca_cert_dir) and cert_reqs is None:
cert_reqs = 'CERT_REQUIRED'
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ca_certs = ca_certs
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
def connect(self):
# Add certificate verification
try:
sock = socket.create_connection(
address=(self.host, self.port), timeout=self.timeout,
**self.conn_kw)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, self.timeout))
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY,
self.tcp_nodelay)
conn = self._new_conn()
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)
@ -173,32 +224,65 @@ class VerifiedHTTPSConnection(HTTPSConnection):
# _tunnel_host was added in Python 2.6.3
# (See: http://hg.python.org/cpython/rev/0f57b30a152f)
self.sock = sock
self.sock = conn
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()
# Mark this connection as not reusable
self.auto_open = 0
# Override the host with the one we're requesting data from.
hostname = self._tunnel_host
is_time_off = datetime.date.today() < RECENT_DATE
if is_time_off:
warnings.warn((
'System time is way off (before {0}). This will probably '
'lead to SSL verification errors').format(RECENT_DATE),
SystemTimeWarning
)
# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file,
cert_reqs=resolved_cert_reqs,
ca_certs=self.ca_certs,
ca_cert_dir=self.ca_cert_dir,
server_hostname=hostname,
ssl_version=resolved_ssl_version)
if resolved_cert_reqs != ssl.CERT_NONE:
if self.assert_fingerprint:
assert_fingerprint(self.sock.getpeercert(binary_form=True),
self.assert_fingerprint)
elif self.assert_hostname is not False:
match_hostname(self.sock.getpeercert(),
self.assert_hostname or hostname)
if self.assert_fingerprint:
assert_fingerprint(self.sock.getpeercert(binary_form=True),
self.assert_fingerprint)
elif resolved_cert_reqs != ssl.CERT_NONE \
and self.assert_hostname is not False:
cert = self.sock.getpeercert()
if not cert.get('subjectAltName', ()):
warnings.warn((
'Certificate for {0} has no `subjectAltName`, falling back to check for a '
'`commonName` for now. This feature is being removed by major browsers and '
'deprecated by RFC 2818. (See https://github.com/shazow/urllib3/issues/497 '
'for details.)'.format(hostname)),
SubjectAltNameWarning
)
# In case the hostname is an IPv6 address, strip the square
# brackets from it before using it to validate. This is because
# a certificate with an IPv6 address in it won't have square
# brackets around that address. Sadly, match_hostname won't do this
# for us: it expects the plain host part without any extra work
# that might have been done to make it palatable to httplib.
asserted_hostname = self.assert_hostname or hostname
asserted_hostname = asserted_hostname.strip('[]')
match_hostname(cert, asserted_hostname)
self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED or
self.assert_fingerprint is not None)
if ssl:
# Make a copy for testing.
UnverifiedHTTPSConnection = HTTPSConnection
HTTPSConnection = VerifiedHTTPSConnection
else:
HTTPSConnection = DummyConnection

View file

@ -1,35 +1,34 @@
# urllib3/connectionpool.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import sys
from __future__ import absolute_import
import errno
import logging
import sys
import warnings
from socket import error as SocketError, timeout as SocketTimeout
import socket
try: # Python 3
try: # Python 3
from queue import LifoQueue, Empty, Full
except ImportError:
from Queue import LifoQueue, Empty, Full
import Queue as _ # Platform-specific: Windows
# Queue is imported for side effects on MS Windows
import Queue as _unused_module_Queue # noqa: unused
from .exceptions import (
ClosedPoolError,
ConnectionError,
ConnectTimeoutError,
ProtocolError,
EmptyPoolError,
HeaderParsingError,
HostChangedError,
LocationParseError,
LocationValueError,
MaxRetryError,
ProxyError,
ReadTimeoutError,
SSLError,
TimeoutError,
ReadTimeoutError,
ProxyError,
InsecureRequestWarning,
NewConnectionError,
)
from .packages.ssl_match_hostname import CertificateError
from .packages import six
@ -41,11 +40,12 @@ from .connection import (
)
from .request import RequestMethods
from .response import HTTPResponse
from .util import (
get_host,
is_connection_dropped,
Timeout,
)
from .util.connection import is_connection_dropped
from .util.response import assert_header_parsing
from .util.retry import Retry
from .util.timeout import Timeout
from .util.url import get_host, Url
xrange = six.moves.xrange
@ -54,8 +54,8 @@ log = logging.getLogger(__name__)
_Default = object()
## Pool objects
# Pool objects
class ConnectionPool(object):
"""
Base class for all connection pools, such as
@ -66,11 +66,8 @@ class ConnectionPool(object):
QueueCls = LifoQueue
def __init__(self, host, port=None):
if host is None:
raise LocationParseError(host)
# httplib doesn't like it when we include brackets in ipv6 addresses
host = host.strip('[]')
if not host:
raise LocationValueError("No host specified.")
self.host = host
self.port = port
@ -79,9 +76,25 @@ class ConnectionPool(object):
return '%s(host=%r, port=%r)' % (type(self).__name__,
self.host, self.port)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
# Return False to re-raise any potential exceptions
return False
def close():
"""
Close all pooled connections and disable the pool.
"""
pass
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
_blocking_errnos = set([errno.EAGAIN, errno.EWOULDBLOCK])
class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
Thread-safe connection pool for one host.
@ -111,7 +124,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param maxsize:
Number of connections to save that can be reused. More than 1 is useful
in multithreaded situations. If ``block`` is set to false, more
in multithreaded situations. If ``block`` is set to False, more
connections will be created but they will not be saved once they've
been used.
@ -126,6 +139,9 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
Headers to include with all requests, unless other headers are given
explicitly.
:param retries:
Retry configuration to use by default with requests in this pool.
:param _proxy:
Parsed proxy URL, should not be used directly, instead, see
:class:`urllib3.connectionpool.ProxyManager`"
@ -133,6 +149,10 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param _proxy_headers:
A dictionary with proxy headers, should not be used directly,
instead, see :class:`urllib3.connectionpool.ProxyManager`"
:param \**conn_kw:
Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
:class:`urllib3.connection.HTTPSConnection` instances.
"""
scheme = 'http'
@ -140,18 +160,22 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
def __init__(self, host, port=None, strict=False,
timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1, block=False,
headers=None, _proxy=None, _proxy_headers=None, **conn_kw):
headers=None, retries=None,
_proxy=None, _proxy_headers=None,
**conn_kw):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)
self.strict = strict
# This is for backwards compatibility and can be removed once a timeout
# can only be set to a Timeout object
if not isinstance(timeout, Timeout):
timeout = Timeout.from_float(timeout)
if retries is None:
retries = Retry.DEFAULT
self.timeout = timeout
self.retries = retries
self.pool = self.QueueCls(maxsize)
self.block = block
@ -166,11 +190,14 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# These are mostly for testing and debugging purposes.
self.num_connections = 0
self.num_requests = 0
if sys.version_info < (2, 7): # Python 2.6 and older
conn_kw.pop('source_address', None)
self.conn_kw = conn_kw
if self.proxy:
# Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
# We cannot know if the user has added default socket options, so we cannot replace the
# list.
self.conn_kw.setdefault('socket_options', [])
def _new_conn(self):
"""
Return a fresh :class:`HTTPConnection`.
@ -182,10 +209,6 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn = self.ConnectionCls(host=self.host, port=self.port,
timeout=self.timeout.connect_timeout,
strict=self.strict, **self.conn_kw)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
conn.tcp_nodelay = 0
return conn
def _get_conn(self, timeout=None):
@ -204,7 +227,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
try:
conn = self.pool.get(block=self.block, timeout=timeout)
except AttributeError: # self.pool is None
except AttributeError: # self.pool is None
raise ClosedPoolError(self, "Pool is closed.")
except Empty:
@ -218,6 +241,11 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if conn and is_connection_dropped(conn):
log.info("Resetting dropped connection: %s" % self.host)
conn.close()
if getattr(conn, 'auto_open', 1) == 0:
# This is a proxied connection that has been mutated by
# httplib._tunnel() and cannot be reused (since it would
# attempt to bypass the proxy)
conn = None
return conn or self._new_conn()
@ -237,7 +265,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
"""
try:
self.pool.put(conn, block=False)
return # Everything is dandy, done.
return # Everything is dandy, done.
except AttributeError:
# self.pool is None.
pass
@ -251,6 +279,16 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if conn:
conn.close()
def _validate_conn(self, conn):
"""
Called right before a request is made, after the socket is created.
"""
pass
def _prepare_proxy(self, conn):
# Nothing to do for HTTP connections.
pass
def _get_timeout(self, timeout):
""" Helper that always returns a :class:`urllib3.util.Timeout` """
if timeout is _Default:
@ -263,6 +301,23 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# can be removed later
return Timeout.from_float(timeout)
def _raise_timeout(self, err, url, timeout_value):
"""Is the error actually a timeout? Will raise a ReadTimeout or pass"""
if isinstance(err, SocketTimeout):
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
# See the above comment about EAGAIN in Python 3. In Python 2 we have
# to specifically catch it and throw the timeout error
if hasattr(err, 'errno') and err.errno in _blocking_errnos:
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
# Catch possible read timeouts thrown as SSL errors. If not the
# case, rethrow the original. We need to do this because of:
# http://bugs.python.org/issue10272
if 'timed out' in str(err) or 'did not complete (read)' in str(err): # Python 2.6
raise ReadTimeoutError(self, url, "Read timed out. (read timeout=%s)" % timeout_value)
def _make_request(self, conn, method, url, timeout=_Default,
**httplib_request_kw):
"""
@ -282,23 +337,26 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
self.num_requests += 1
timeout_obj = self._get_timeout(timeout)
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# Trigger any extra validation we need to do.
try:
timeout_obj.start_connect()
conn.timeout = timeout_obj.connect_timeout
# conn.request() calls httplib.*.request, not the method in
# urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
except SocketTimeout:
raise ConnectTimeoutError(
self, "Connection to %s timed out. (connect timeout=%s)" %
(self.host, timeout_obj.connect_timeout))
self._validate_conn(conn)
except (SocketTimeout, BaseSSLError) as e:
# Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
raise
# conn.request() calls httplib.*.request, not the method in
# urllib3.request. It also calls makefile (recv) on the socket.
conn.request(method, url, **httplib_request_kw)
# Reset the timeout for the recv() on the socket
read_timeout = timeout_obj.read_timeout
# App Engine doesn't have a sock attr
if hasattr(conn, 'sock'):
if getattr(conn, 'sock', None):
# In Python 3 socket.py will catch EAGAIN and return None when you
# try and read into the file pointer created by http.client, which
# instead raises a BadStatusLine exception. Instead of catching
@ -306,41 +364,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
# timeouts, check for a zero timeout before making the request.
if read_timeout == 0:
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
if read_timeout is Timeout.DEFAULT_TIMEOUT:
conn.sock.settimeout(socket.getdefaulttimeout())
else: # None or a value
else: # None or a value
conn.sock.settimeout(read_timeout)
# Receive the response from the server
try:
try: # Python 2.7+, use buffering of HTTP responses
try: # Python 2.7, use buffering of HTTP responses
httplib_response = conn.getresponse(buffering=True)
except TypeError: # Python 2.6 and older
except TypeError: # Python 2.6 and older
httplib_response = conn.getresponse()
except SocketTimeout:
raise ReadTimeoutError(
self, url, "Read timed out. (read timeout=%s)" % read_timeout)
except BaseSSLError as e:
# Catch possible read timeouts thrown as SSL errors. If not the
# case, rethrow the original. We need to do this because of:
# http://bugs.python.org/issue10272
if 'timed out' in str(e) or \
'did not complete (read)' in str(e): # Python 2.6
raise ReadTimeoutError(self, url, "Read timed out.")
raise
except SocketError as e: # Platform-specific: Python 2
# See the above comment about EAGAIN in Python 3. In Python 2 we
# have to specifically catch it and throw the timeout error
if e.errno in _blocking_errnos:
raise ReadTimeoutError(
self, url,
"Read timed out. (read timeout=%s)" % read_timeout)
except (SocketTimeout, BaseSSLError, SocketError) as e:
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
raise
# AppEngine doesn't have a version attr.
@ -348,8 +385,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
log.debug("\"%s %s %s\" %s %s" % (method, url, http_version,
httplib_response.status,
httplib_response.length))
try:
assert_header_parsing(httplib_response.msg)
except HeaderParsingError as hpe: # Platform-specific: Python 3
log.warning(
'Failed to parse headers (url=%s): %s',
self._absolute_url(url), hpe, exc_info=True)
return httplib_response
def _absolute_url(self, path):
return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
def close(self):
"""
Close all pooled connections and disable the pool.
@ -364,7 +412,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
conn.close()
except Empty:
pass # Done.
pass # Done.
def is_same_host(self, url):
"""
@ -385,7 +433,7 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
return (scheme, host, port) == (self.scheme, self.host, self.port)
def urlopen(self, method, url, body=None, headers=None, retries=3,
def urlopen(self, method, url, body=None, headers=None, retries=None,
redirect=True, assert_same_host=True, timeout=_Default,
pool_timeout=None, release_conn=None, **response_kw):
"""
@ -419,9 +467,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
these headers completely replace any pool-specific headers.
:param retries:
Number of retries to allow before raising a MaxRetryError exception.
If `False`, then retries are disabled and any exception is raised
immediately.
Configure the number of retries to allow before raising a
:class:`~urllib3.exceptions.MaxRetryError` exception.
Pass ``None`` to retry until you receive a response. Pass a
:class:`~urllib3.util.retry.Retry` object for fine-grained control
over different types of retries.
Pass an integer number to retry connection errors that many times,
but no other types of errors. Pass zero to never retry.
If ``False``, then retries are disabled and any exception is raised
immediately. Also, instead of raising a MaxRetryError on redirects,
the redirect response will be returned.
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
@ -460,15 +519,15 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if headers is None:
headers = self.headers
if retries < 0 and retries is not False:
raise MaxRetryError(self, url)
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
if release_conn is None:
release_conn = response_kw.get('preload_content', True)
# Check host
if assert_same_host and not self.is_same_host(url):
raise HostChangedError(self, url, retries - 1)
raise HostChangedError(self, url, retries)
conn = None
@ -484,12 +543,19 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
err = None
try:
# Request a connection from the queue
# Request a connection from the queue.
timeout_obj = self._get_timeout(timeout)
conn = self._get_conn(timeout=pool_timeout)
# Make the request on the httplib connection object
conn.timeout = timeout_obj.connect_timeout
is_new_proxy_conn = self.proxy is not None and not getattr(conn, 'sock', None)
if is_new_proxy_conn:
self._prepare_proxy(conn)
# Make the request on the httplib connection object.
httplib_response = self._make_request(conn, method, url,
timeout=timeout,
timeout=timeout_obj,
body=body, headers=headers)
# If we're going to release the connection in ``finally:``, then
@ -514,33 +580,34 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
raise EmptyPoolError(self, "No pool connections are available.")
except (BaseSSLError, CertificateError) as e:
# Release connection unconditionally because there is no way to
# close it externally in case of exception.
# Close the connection. If a connection is reused on which there
# was a Certificate error, the next request will certainly raise
# another Certificate error.
conn = conn and conn.close()
release_conn = True
raise SSLError(e)
except (TimeoutError, HTTPException, SocketError) as e:
if conn:
# Discard the connection for these exceptions. It will be
# be replaced during the next _get_conn() call.
conn.close()
conn = None
except SSLError:
# Treat SSLError separately from BaseSSLError to preserve
# traceback.
conn = conn and conn.close()
release_conn = True
raise
if not retries:
if isinstance(e, TimeoutError):
# TimeoutError is exempt from MaxRetryError-wrapping.
# FIXME: ... Not sure why. Add a reason here.
raise
except (TimeoutError, HTTPException, SocketError, ProtocolError) as e:
# Discard the connection for these exceptions. It will be
# be replaced during the next _get_conn() call.
conn = conn and conn.close()
release_conn = True
# Wrap unexpected exceptions with the most appropriate
# module-level exception and re-raise.
if isinstance(e, SocketError) and self.proxy:
raise ProxyError('Cannot connect to proxy.', e)
if isinstance(e, (SocketError, NewConnectionError)) and self.proxy:
e = ProxyError('Cannot connect to proxy.', e)
elif isinstance(e, (SocketError, HTTPException)):
e = ProtocolError('Connection aborted.', e)
if retries is False:
raise ConnectionError('Connection failed.', e)
raise MaxRetryError(self, url, e)
retries = retries.increment(method, url, error=e, _pool=self,
_stacktrace=sys.exc_info()[2])
retries.sleep()
# Keep track of the error for the retry warning.
err = e
@ -554,23 +621,48 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
if not conn:
# Try again
log.warning("Retrying (%d attempts remain) after connection "
log.warning("Retrying (%r) after connection "
"broken by '%r': %s" % (retries, err, url))
return self.urlopen(method, url, body, headers, retries - 1,
return self.urlopen(method, url, body, headers, retries,
redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
# Handle redirect?
redirect_location = redirect and response.get_redirect_location()
if redirect_location and retries is not False:
if redirect_location:
if response.status == 303:
method = 'GET'
try:
retries = retries.increment(method, url, response=response, _pool=self)
except MaxRetryError:
if retries.raise_on_redirect:
# Release the connection for this response, since we're not
# returning it to be released manually.
response.release_conn()
raise
return response
log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, body, headers,
retries - 1, redirect, assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
return self.urlopen(
method, redirect_location, body, headers,
retries=retries, redirect=redirect,
assert_same_host=assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
# Check if we should retry the HTTP response.
if retries.is_forced_retry(method, status_code=response.status):
retries = retries.increment(method, url, response=response, _pool=self)
retries.sleep()
log.info("Forced retry: %s" % url)
return self.urlopen(
method, url, body, headers,
retries=retries, redirect=redirect,
assert_same_host=assert_same_host,
timeout=timeout, pool_timeout=pool_timeout,
release_conn=release_conn, **response_kw)
return response
@ -587,37 +679,39 @@ class HTTPSConnectionPool(HTTPConnectionPool):
``assert_hostname`` and ``host`` in this order to verify connections.
If ``assert_hostname`` is False, no verification is done.
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs`` and
``ssl_version`` are only used if :mod:`ssl` is available and are fed into
:meth:`urllib3.util.ssl_wrap_socket` to upgrade the connection socket
into an SSL socket.
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
``ca_cert_dir``, and ``ssl_version`` are only used if :mod:`ssl` is
available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
the connection socket into an SSL socket.
"""
scheme = 'https'
ConnectionCls = HTTPSConnection
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
strict=False, timeout=Timeout.DEFAULT_TIMEOUT, maxsize=1,
block=False, headers=None, retries=None,
_proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None,
**conn_kw):
if sys.version_info < (2, 7): # Python 2.6 or older
conn_kw.pop('source_address', None)
ca_cert_dir=None, **conn_kw):
HTTPConnectionPool.__init__(self, host, port, strict, timeout, maxsize,
block, headers, _proxy, _proxy_headers, **conn_kw)
block, headers, retries, _proxy, _proxy_headers,
**conn_kw)
if ca_certs and cert_reqs is None:
cert_reqs = 'CERT_REQUIRED'
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
self.ca_certs = ca_certs
self.ca_cert_dir = ca_cert_dir
self.ssl_version = ssl_version
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.conn_kw = conn_kw
def _prepare_conn(self, conn):
"""
@ -630,24 +724,31 @@ class HTTPSConnectionPool(HTTPConnectionPool):
cert_file=self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs,
ca_cert_dir=self.ca_cert_dir,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
conn.ssl_version = self.ssl_version
conn.conn_kw = self.conn_kw
if self.proxy is not None:
# Python 2.7+
try:
set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = conn._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
conn.connect()
return conn
def _prepare_proxy(self, conn):
"""
Establish tunnel connection early, because otherwise httplib
would improperly set Host: header to proxy's IP:port.
"""
# Python 2.7+
try:
set_tunnel = conn.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = conn._set_tunnel
if sys.version_info <= (2, 6, 4) and not self.proxy_headers: # Python 2.6.4 and older
set_tunnel(self.host, self.port)
else:
set_tunnel(self.host, self.port, self.proxy_headers)
conn.connect()
def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
@ -657,7 +758,6 @@ class HTTPSConnectionPool(HTTPConnectionPool):
% (self.num_connections, self.host))
if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
# Platform-specific: Python without ssl
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
@ -667,21 +767,29 @@ class HTTPSConnectionPool(HTTPConnectionPool):
actual_host = self.proxy.host
actual_port = self.proxy.port
extra_params = {}
if not six.PY3: # Python 2
extra_params['strict'] = self.strict
extra_params.update(self.conn_kw)
conn = self.ConnectionCls(host=actual_host, port=actual_port,
timeout=self.timeout.connect_timeout,
**extra_params)
if self.proxy is not None:
# Enable Nagle's algorithm for proxies, to avoid packet
# fragmentation.
conn.tcp_nodelay = 0
strict=self.strict, **self.conn_kw)
return self._prepare_conn(conn)
def _validate_conn(self, conn):
"""
Called right before a request is made, after the socket is created.
"""
super(HTTPSConnectionPool, self)._validate_conn(conn)
# Force connect early to allow us to validate the connection.
if not getattr(conn, 'sock', None): # AppEngine might not have `.sock`
conn.connect()
if not conn.is_verified:
warnings.warn((
'Unverified HTTPS request is being made. '
'Adding certificate verification is strongly advised. See: '
'https://urllib3.readthedocs.org/en/latest/security.html'),
InsecureRequestWarning)
def connection_from_url(url, **kw):
"""
@ -698,7 +806,7 @@ def connection_from_url(url, **kw):
:class:`.ConnectionPool`. Useful for specifying things like
timeout, maxsize, headers, etc.
Example: ::
Example::
>>> conn = connection_from_url('http://google.com/')
>>> r = conn.request('GET', '/')

View file

@ -0,0 +1,223 @@
from __future__ import absolute_import
import logging
import os
import warnings
from ..exceptions import (
HTTPError,
HTTPWarning,
MaxRetryError,
ProtocolError,
TimeoutError,
SSLError
)
from ..packages.six import BytesIO
from ..request import RequestMethods
from ..response import HTTPResponse
from ..util.timeout import Timeout
from ..util.retry import Retry
try:
from google.appengine.api import urlfetch
except ImportError:
urlfetch = None
log = logging.getLogger(__name__)
class AppEnginePlatformWarning(HTTPWarning):
pass
class AppEnginePlatformError(HTTPError):
pass
class AppEngineManager(RequestMethods):
"""
Connection manager for Google App Engine sandbox applications.
This manager uses the URLFetch service directly instead of using the
emulated httplib, and is subject to URLFetch limitations as described in
the App Engine documentation here:
https://cloud.google.com/appengine/docs/python/urlfetch
Notably it will raise an AppEnginePlatformError if:
* URLFetch is not available.
* If you attempt to use this on GAEv2 (Managed VMs), as full socket
support is available.
* If a request size is more than 10 megabytes.
* If a response size is more than 32 megabtyes.
* If you use an unsupported request method such as OPTIONS.
Beyond those cases, it will raise normal urllib3 errors.
"""
def __init__(self, headers=None, retries=None, validate_certificate=True):
if not urlfetch:
raise AppEnginePlatformError(
"URLFetch is not available in this environment.")
if is_prod_appengine_mvms():
raise AppEnginePlatformError(
"Use normal urllib3.PoolManager instead of AppEngineManager"
"on Managed VMs, as using URLFetch is not necessary in "
"this environment.")
warnings.warn(
"urllib3 is using URLFetch on Google App Engine sandbox instead "
"of sockets. To use sockets directly instead of URLFetch see "
"https://urllib3.readthedocs.org/en/latest/contrib.html.",
AppEnginePlatformWarning)
RequestMethods.__init__(self, headers)
self.validate_certificate = validate_certificate
self.retries = retries or Retry.DEFAULT
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
# Return False to re-raise any potential exceptions
return False
def urlopen(self, method, url, body=None, headers=None,
retries=None, redirect=True, timeout=Timeout.DEFAULT_TIMEOUT,
**response_kw):
retries = self._get_retries(retries, redirect)
try:
response = urlfetch.fetch(
url,
payload=body,
method=method,
headers=headers or {},
allow_truncated=False,
follow_redirects=(
redirect and
retries.redirect != 0 and
retries.total),
deadline=self._get_absolute_timeout(timeout),
validate_certificate=self.validate_certificate,
)
except urlfetch.DeadlineExceededError as e:
raise TimeoutError(self, e)
except urlfetch.InvalidURLError as e:
if 'too large' in str(e):
raise AppEnginePlatformError(
"URLFetch request too large, URLFetch only "
"supports requests up to 10mb in size.", e)
raise ProtocolError(e)
except urlfetch.DownloadError as e:
if 'Too many redirects' in str(e):
raise MaxRetryError(self, url, reason=e)
raise ProtocolError(e)
except urlfetch.ResponseTooLargeError as e:
raise AppEnginePlatformError(
"URLFetch response too large, URLFetch only supports"
"responses up to 32mb in size.", e)
except urlfetch.SSLCertificateError as e:
raise SSLError(e)
except urlfetch.InvalidMethodError as e:
raise AppEnginePlatformError(
"URLFetch does not support method: %s" % method, e)
http_response = self._urlfetch_response_to_http_response(
response, **response_kw)
# Check for redirect response
if (http_response.get_redirect_location() and
retries.raise_on_redirect and redirect):
raise MaxRetryError(self, url, "too many redirects")
# Check if we should retry the HTTP response.
if retries.is_forced_retry(method, status_code=http_response.status):
retries = retries.increment(
method, url, response=http_response, _pool=self)
log.info("Forced retry: %s" % url)
retries.sleep()
return self.urlopen(
method, url,
body=body, headers=headers,
retries=retries, redirect=redirect,
timeout=timeout, **response_kw)
return http_response
def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
if is_prod_appengine():
# Production GAE handles deflate encoding automatically, but does
# not remove the encoding header.
content_encoding = urlfetch_resp.headers.get('content-encoding')
if content_encoding == 'deflate':
del urlfetch_resp.headers['content-encoding']
return HTTPResponse(
# In order for decoding to work, we must present the content as
# a file-like object.
body=BytesIO(urlfetch_resp.content),
headers=urlfetch_resp.headers,
status=urlfetch_resp.status_code,
**response_kw
)
def _get_absolute_timeout(self, timeout):
if timeout is Timeout.DEFAULT_TIMEOUT:
return 5 # 5s is the default timeout for URLFetch.
if isinstance(timeout, Timeout):
if timeout.read is not timeout.connect:
warnings.warn(
"URLFetch does not support granular timeout settings, "
"reverting to total timeout.", AppEnginePlatformWarning)
return timeout.total
return timeout
def _get_retries(self, retries, redirect):
if not isinstance(retries, Retry):
retries = Retry.from_int(
retries, redirect=redirect, default=self.retries)
if retries.connect or retries.read or retries.redirect:
warnings.warn(
"URLFetch only supports total retries and does not "
"recognize connect, read, or redirect retry parameters.",
AppEnginePlatformWarning)
return retries
def is_appengine():
return (is_local_appengine() or
is_prod_appengine() or
is_prod_appengine_mvms())
def is_appengine_sandbox():
return is_appengine() and not is_prod_appengine_mvms()
def is_local_appengine():
return ('APPENGINE_RUNTIME' in os.environ and
'Development/' in os.environ['SERVER_SOFTWARE'])
def is_prod_appengine():
return ('APPENGINE_RUNTIME' in os.environ and
'Google App Engine/' in os.environ['SERVER_SOFTWARE'] and
not is_prod_appengine_mvms())
def is_prod_appengine_mvms():
return os.environ.get('GAE_VM', False) == 'true'

View file

@ -1,14 +1,9 @@
# urllib3/contrib/ntlmpool.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
NTLM authenticating pool, contributed by erikcederstran
Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
"""
from __future__ import absolute_import
try:
from http.client import HTTPSConnection

View file

@ -29,7 +29,7 @@ Now you can use :mod:`urllib3` as you normally would, and it will support SNI
when the required modules are installed.
Activating this module also has the positive side effect of disabling SSL/TLS
encryption in Python 2 (see `CRIME attack`_).
compression in Python 2 (see `CRIME attack`_).
If you want to configure the default list of supported cipher suites, you can
set the ``urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST`` variable.
@ -38,23 +38,25 @@ Module Variables
----------------
:var DEFAULT_SSL_CIPHER_LIST: The list of supported SSL/TLS cipher suites.
Default: ``ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:
ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS``
.. _sni: https://en.wikipedia.org/wiki/Server_Name_Indication
.. _crime attack: https://en.wikipedia.org/wiki/CRIME_(security_exploit)
'''
from __future__ import absolute_import
try:
from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName
except SyntaxError as e:
raise ImportError(e)
from ndg.httpsclient.ssl_peer_verification import SUBJ_ALT_NAME_SUPPORT
from ndg.httpsclient.subj_alt_name import SubjectAltName as BaseSubjectAltName
import OpenSSL.SSL
from pyasn1.codec.der import decoder as der_decoder
from pyasn1.type import univ, constraint
from socket import _fileobject, timeout
from socket import _fileobject, timeout, error as SocketError
import ssl
import select
from cStringIO import StringIO
from .. import connection
from .. import util
@ -67,33 +69,31 @@ HAS_SNI = SUBJ_ALT_NAME_SUPPORT
# Map from urllib3 to PyOpenSSL compatible parameter-values.
_openssl_versions = {
ssl.PROTOCOL_SSLv23: OpenSSL.SSL.SSLv23_METHOD,
ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD,
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
}
if hasattr(ssl, 'PROTOCOL_TLSv1_1') and hasattr(OpenSSL.SSL, 'TLSv1_1_METHOD'):
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
if hasattr(ssl, 'PROTOCOL_TLSv1_2') and hasattr(OpenSSL.SSL, 'TLSv1_2_METHOD'):
_openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD
try:
_openssl_versions.update({ssl.PROTOCOL_SSLv3: OpenSSL.SSL.SSLv3_METHOD})
except AttributeError:
pass
_openssl_verify = {
ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE,
ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER,
ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
ssl.CERT_REQUIRED:
OpenSSL.SSL.VERIFY_PEER + OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
# A secure default.
# Sources for more information on TLS ciphers:
#
# - https://wiki.mozilla.org/Security/Server_Side_TLS
# - https://www.ssllabs.com/projects/best-practices/index.html
# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
#
# The general intent is:
# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
# - prefer ECDHE over DHE for better performance,
# - prefer any AES-GCM over any AES-CBC for better performance and security,
# - use 3DES as fallback which is secure but slow,
# - disable NULL authentication, MD5 MACs and DSS for security reasons.
DEFAULT_SSL_CIPHER_LIST = "ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:" + \
"ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:" + \
"!aNULL:!MD5:!DSS"
DEFAULT_SSL_CIPHER_LIST = util.ssl_.DEFAULT_CIPHERS
# OpenSSL will only write 16K at a time
SSL_WRITE_BLOCKSIZE = 16384
orig_util_HAS_SNI = util.HAS_SNI
orig_connection_ssl_wrap_socket = connection.ssl_wrap_socket
@ -113,7 +113,7 @@ def extract_from_urllib3():
util.HAS_SNI = orig_util_HAS_SNI
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
# Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
class SubjectAltName(BaseSubjectAltName):
'''ASN.1 implementation for subjectAltNames support'''
@ -124,7 +124,7 @@ class SubjectAltName(BaseSubjectAltName):
constraint.ValueSizeConstraint(1, 1024)
### Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
# Note: This is a slightly bug-fixed version of same from ndg-httpsclient.
def get_subj_alt_name(peer_cert):
# Search through extensions
dns_name = []
@ -155,205 +155,81 @@ def get_subj_alt_name(peer_cert):
return dns_name
class fileobject(_fileobject):
def _wait_for_sock(self):
rd, wd, ed = select.select([self._sock], [], [],
self._sock.gettimeout())
if not rd:
raise timeout()
def read(self, size=-1):
# Use max, disallow tiny reads in a loop as they are very inefficient.
# We never leave read() with any leftover data from a new recv() call
# in our internal buffer.
rbufsize = max(self._rbufsize, self.default_bufsize)
# Our use of StringIO rather than lists of string objects returned by
# recv() minimizes memory usage and fragmentation that occurs when
# rbufsize is large compared to the typical return value of recv().
buf = self._rbuf
buf.seek(0, 2) # seek end
if size < 0:
# Read until EOF
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(rbufsize)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
buf.write(data)
return buf.getvalue()
else:
# Read until size bytes or EOF seen, whichever comes first
buf_len = buf.tell()
if buf_len >= size:
# Already have size bytes in our buffer? Extract and return.
buf.seek(0)
rv = buf.read(size)
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return rv
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
left = size - buf_len
# recv() will malloc the amount of memory given as its
# parameter even though it often returns much less data
# than that. The returned data string is short lived
# as we copy it into a StringIO and free it. This avoids
# fragmentation issues on many platforms.
try:
data = self._sock.recv(left)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
n = len(data)
if n == size and not buf_len:
# Shortcut. Avoid buffer data copies when:
# - We have no data in our buffer.
# AND
# - Our call to recv returned exactly the
# number of bytes we were asked to read.
return data
if n == left:
buf.write(data)
del data # explicit free
break
assert n <= left, "recv(%d) returned %d bytes" % (left, n)
buf.write(data)
buf_len += n
del data # explicit free
#assert buf_len == buf.tell()
return buf.getvalue()
def readline(self, size=-1):
buf = self._rbuf
buf.seek(0, 2) # seek end
if buf.tell() > 0:
# check if we already have it in our buffer
buf.seek(0)
bline = buf.readline(size)
if bline.endswith('\n') or len(bline) == size:
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return bline
del bline
if size < 0:
# Read until \n or EOF, whichever comes first
if self._rbufsize <= 1:
# Speed up unbuffered case
buf.seek(0)
buffers = [buf.read()]
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
data = None
recv = self._sock.recv
while True:
try:
while data != "\n":
data = recv(1)
if not data:
break
buffers.append(data)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
break
return "".join(buffers)
buf.seek(0, 2) # seek end
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
nl = data.find('\n')
if nl >= 0:
nl += 1
buf.write(data[:nl])
self._rbuf.write(data[nl:])
del data
break
buf.write(data)
return buf.getvalue()
else:
# Read until size bytes or \n or EOF seen, whichever comes first
buf.seek(0, 2) # seek end
buf_len = buf.tell()
if buf_len >= size:
buf.seek(0)
rv = buf.read(size)
self._rbuf = StringIO()
self._rbuf.write(buf.read())
return rv
self._rbuf = StringIO() # reset _rbuf. we consume it via buf.
while True:
try:
data = self._sock.recv(self._rbufsize)
except OpenSSL.SSL.WantReadError:
self._wait_for_sock()
continue
if not data:
break
left = size - buf_len
# did we just receive a newline?
nl = data.find('\n', 0, left)
if nl >= 0:
nl += 1
# save the excess data to _rbuf
self._rbuf.write(data[nl:])
if buf_len:
buf.write(data[:nl])
break
else:
# Shortcut. Avoid data copy through buf when returning
# a substring of our first recv().
return data[:nl]
n = len(data)
if n == size and not buf_len:
# Shortcut. Avoid data copy through buf when
# returning exactly all of our first recv().
return data
if n >= left:
buf.write(data[:left])
self._rbuf.write(data[left:])
break
buf.write(data)
buf_len += n
#assert buf_len == buf.tell()
return buf.getvalue()
class WrappedSocket(object):
'''API-compatibility wrapper for Python OpenSSL's Connection-class.'''
'''API-compatibility wrapper for Python OpenSSL's Connection-class.
def __init__(self, connection, socket):
Note: _makefile_refs, _drop() and _reuse() are needed for the garbage
collector of pypy.
'''
def __init__(self, connection, socket, suppress_ragged_eofs=True):
self.connection = connection
self.socket = socket
self.suppress_ragged_eofs = suppress_ragged_eofs
self._makefile_refs = 0
def fileno(self):
return self.socket.fileno()
def makefile(self, mode, bufsize=-1):
return fileobject(self.connection, mode, bufsize)
self._makefile_refs += 1
return _fileobject(self, mode, bufsize, close=True)
def recv(self, *args, **kwargs):
try:
data = self.connection.recv(*args, **kwargs)
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, 'Unexpected EOF'):
return b''
else:
raise SocketError(e)
except OpenSSL.SSL.ZeroReturnError as e:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return b''
else:
raise
except OpenSSL.SSL.WantReadError:
rd, wd, ed = select.select(
[self.socket], [], [], self.socket.gettimeout())
if not rd:
raise timeout('The read operation timed out')
else:
return self.recv(*args, **kwargs)
else:
return data
def settimeout(self, timeout):
return self.socket.settimeout(timeout)
def _send_until_done(self, data):
while True:
try:
return self.connection.send(data)
except OpenSSL.SSL.WantWriteError:
_, wlist, _ = select.select([], [self.socket], [],
self.socket.gettimeout())
if not wlist:
raise timeout()
continue
def sendall(self, data):
return self.connection.sendall(data)
total_sent = 0
while total_sent < len(data):
sent = self._send_until_done(data[total_sent:total_sent + SSL_WRITE_BLOCKSIZE])
total_sent += sent
def shutdown(self):
# FIXME rethrow compatible exceptions should we ever use this
self.connection.shutdown()
def close(self):
return self.connection.shutdown()
if self._makefile_refs < 1:
try:
return self.connection.close()
except OpenSSL.SSL.Error:
return
else:
self._makefile_refs -= 1
def getpeercert(self, binary_form=False):
x509 = self.connection.get_peer_certificate()
@ -376,6 +252,15 @@ class WrappedSocket(object):
]
}
def _reuse(self):
self._makefile_refs += 1
def _drop(self):
if self._makefile_refs < 1:
self.close()
else:
self._makefile_refs -= 1
def _verify_callback(cnx, x509, err_no, err_depth, return_code):
return err_no == 0
@ -383,17 +268,18 @@ def _verify_callback(cnx, x509, err_no, err_depth, return_code):
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None):
ssl_version=None, ca_cert_dir=None):
ctx = OpenSSL.SSL.Context(_openssl_versions[ssl_version])
if certfile:
keyfile = keyfile or certfile # Match behaviour of the normal python ssl library
ctx.use_certificate_file(certfile)
if keyfile:
ctx.use_privatekey_file(keyfile)
if cert_reqs != ssl.CERT_NONE:
ctx.set_verify(_openssl_verify[cert_reqs], _verify_callback)
if ca_certs:
if ca_certs or ca_cert_dir:
try:
ctx.load_verify_locations(ca_certs, None)
ctx.load_verify_locations(ca_certs, ca_cert_dir)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad ca_certs: %r' % ca_certs, e)
else:
@ -413,10 +299,12 @@ def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError:
select.select([sock], [], [])
rd, _, _ = select.select([sock], [], [], sock.gettimeout())
if not rd:
raise timeout('select timed out')
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError('bad handshake', e)
raise ssl.SSLError('bad handshake: %r' % e)
break
return WrappedSocket(cnx, sock)

View file

@ -1,17 +1,17 @@
# urllib3/exceptions.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
# Base Exceptions
## Base Exceptions
class HTTPError(Exception):
"Base exception used by this module."
pass
class HTTPWarning(Warning):
"Base warning used by this module."
pass
class PoolError(HTTPError):
"Base exception for errors caused within a pool."
def __init__(self, pool, message):
@ -44,29 +44,37 @@ class ProxyError(HTTPError):
pass
class ConnectionError(HTTPError):
"Raised when a normal connection fails."
pass
class DecodeError(HTTPError):
"Raised when automatic decoding based on Content-Type fails."
pass
## Leaf Exceptions
class ProtocolError(HTTPError):
"Raised when something unexpected happens mid-request/response."
pass
#: Renamed to ProtocolError but aliased for backwards compatibility.
ConnectionError = ProtocolError
# Leaf Exceptions
class MaxRetryError(RequestError):
"Raised when the maximum number of retries is exceeded."
"""Raised when the maximum number of retries is exceeded.
:param pool: The connection pool
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
:param string url: The requested Url
:param exceptions.Exception reason: The underlying error
"""
def __init__(self, pool, url, reason=None):
self.reason = reason
message = "Max retries exceeded with url: %s" % url
if reason:
message += " (Caused by %s: %s)" % (type(reason), reason)
else:
message += " (Caused by redirect)"
message = "Max retries exceeded with url: %s (Caused by %r)" % (
url, reason)
RequestError.__init__(self, pool, url, message)
@ -106,6 +114,11 @@ class ConnectTimeoutError(TimeoutError):
pass
class NewConnectionError(ConnectTimeoutError, PoolError):
"Raised when we fail to establish a new connection. Usually ECONNREFUSED."
pass
class EmptyPoolError(PoolError):
"Raised when a pool runs out of connections and no more are allowed."
pass
@ -116,7 +129,12 @@ class ClosedPoolError(PoolError):
pass
class LocationParseError(ValueError, HTTPError):
class LocationValueError(ValueError, HTTPError):
"Raised when there is something wrong with a given URL input."
pass
class LocationParseError(LocationValueError):
"Raised when get_host or similar fails to parse the URL input."
def __init__(self, location):
@ -124,3 +142,60 @@ class LocationParseError(ValueError, HTTPError):
HTTPError.__init__(self, message)
self.location = location
class ResponseError(HTTPError):
"Used as a container for an error reason supplied in a MaxRetryError."
GENERIC_ERROR = 'too many error responses'
SPECIFIC_ERROR = 'too many {status_code} error responses'
class SecurityWarning(HTTPWarning):
"Warned when perfoming security reducing actions"
pass
class SubjectAltNameWarning(SecurityWarning):
"Warned when connecting to a host with a certificate missing a SAN."
pass
class InsecureRequestWarning(SecurityWarning):
"Warned when making an unverified HTTPS request."
pass
class SystemTimeWarning(SecurityWarning):
"Warned when system time is suspected to be wrong"
pass
class InsecurePlatformWarning(SecurityWarning):
"Warned when certain SSL configuration is not available on a platform."
pass
class SNIMissingWarning(HTTPWarning):
"Warned when making a HTTPS request without SNI available."
pass
class ResponseNotChunked(ProtocolError, ValueError):
"Response needs to be chunked in order to read it as chunks."
pass
class ProxySchemeUnknown(AssertionError, ValueError):
"ProxyManager does not support the supplied scheme"
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
def __init__(self, scheme):
message = "Not supported proxy scheme %s" % scheme
super(ProxySchemeUnknown, self).__init__(message)
class HeaderParsingError(HTTPError):
"Raised by assert_header_parsing, but we convert it to a log.warning statement."
def __init__(self, defects, unparsed_data):
message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data)
super(HeaderParsingError, self).__init__(message)

View file

@ -1,9 +1,4 @@
# urllib3/fields.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
import email.utils
import mimetypes
@ -78,9 +73,10 @@ class RequestField(object):
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
Supports constructing :class:`~urllib3.fields.RequestField` from parameter
of key/value strings AND key/filetuple. A filetuple is a (filename, data, MIME type)
tuple where the MIME type is optional. For example: ::
Supports constructing :class:`~urllib3.fields.RequestField` from
parameter of key/value strings AND key/filetuple. A filetuple is a
(filename, data, MIME type) tuple where the MIME type is optional.
For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
@ -125,8 +121,8 @@ class RequestField(object):
'Content-Disposition' fields.
:param header_parts:
A sequence of (k, v) typles or a :class:`dict` of (k, v) to format as
`k1="v1"; k2="v2"; ...`.
A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`.
"""
parts = []
iterable = header_parts
@ -158,7 +154,8 @@ class RequestField(object):
lines.append('\r\n')
return '\r\n'.join(lines)
def make_multipart(self, content_disposition=None, content_type=None, content_location=None):
def make_multipart(self, content_disposition=None, content_type=None,
content_location=None):
"""
Makes this request field into a multipart request field.
@ -172,6 +169,10 @@ class RequestField(object):
"""
self.headers['Content-Disposition'] = content_disposition or 'form-data'
self.headers['Content-Disposition'] += '; '.join(['', self._render_parts((('name', self._name), ('filename', self._filename)))])
self.headers['Content-Disposition'] += '; '.join([
'', self._render_parts(
(('name', self._name), ('filename', self._filename))
)
])
self.headers['Content-Type'] = content_type
self.headers['Content-Location'] = content_location

View file

@ -1,11 +1,5 @@
# urllib3/filepost.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
import codecs
import mimetypes
from uuid import uuid4
from io import BytesIO
@ -38,10 +32,10 @@ def iter_field_objects(fields):
i = iter(fields)
for field in i:
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
def iter_fields(fields):

View file

@ -2,3 +2,4 @@ from __future__ import absolute_import
from . import ssl_match_hostname
__all__ = ('ssl_match_hostname', )

View file

@ -2,7 +2,6 @@
# Passes Python2.7's test suite and incorporates all the latest updates.
# Copyright 2009 Raymond Hettinger, released under the MIT License.
# http://code.activestate.com/recipes/576693/
try:
from thread import get_ident as _get_ident
except ImportError:

View file

@ -1,9 +1,4 @@
# urllib3/poolmanager.py
# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
import logging
try: # Python 3
@ -14,8 +9,10 @@ except ImportError:
from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import port_by_scheme
from .exceptions import LocationValueError, MaxRetryError, ProxySchemeUnknown
from .request import RequestMethods
from .util import parse_url
from .util.url import parse_url
from .util.retry import Retry
__all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
@ -29,7 +26,7 @@ pool_classes_by_scheme = {
log = logging.getLogger(__name__)
SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
'ssl_version')
'ssl_version', 'ca_cert_dir')
class PoolManager(RequestMethods):
@ -49,7 +46,7 @@ class PoolManager(RequestMethods):
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
Example: ::
Example::
>>> manager = PoolManager(num_pools=2)
>>> r = manager.request('GET', 'http://google.com/')
@ -68,6 +65,14 @@ class PoolManager(RequestMethods):
self.pools = RecentlyUsedContainer(num_pools,
dispose_func=lambda p: p.close())
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.clear()
# Return False to re-raise any potential exceptions
return False
def _new_pool(self, scheme, host, port):
"""
Create a new :class:`ConnectionPool` based on host, port and scheme.
@ -102,10 +107,11 @@ class PoolManager(RequestMethods):
``urllib3.connectionpool.port_by_scheme``.
"""
if not host:
raise LocationValueError("No host specified.")
scheme = scheme or 'http'
port = port or port_by_scheme.get(scheme, 80)
pool_key = (scheme, host, port)
with self.pools.lock:
@ -118,6 +124,7 @@ class PoolManager(RequestMethods):
# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
return pool
def connection_from_url(self, url):
@ -161,13 +168,25 @@ class PoolManager(RequestMethods):
# Support relative URLs for redirecting.
redirect_location = urljoin(url, redirect_location)
# RFC 2616, Section 10.3.4
# RFC 7231, Section 6.4.4
if response.status == 303:
method = 'GET'
log.info("Redirecting %s -> %s" % (url, redirect_location))
kw['retries'] = kw.get('retries', 3) - 1 # Persist retries countdown
retries = kw.get('retries')
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect)
try:
retries = retries.increment(method, url, response=response, _pool=conn)
except MaxRetryError:
if retries.raise_on_redirect:
raise
return response
kw['retries'] = retries
kw['redirect'] = redirect
log.info("Redirecting %s -> %s" % (url, redirect_location))
return self.urlopen(method, redirect_location, **kw)
@ -208,12 +227,16 @@ class ProxyManager(PoolManager):
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
if proxy.scheme not in ("http", "https"):
raise ProxySchemeUnknown(proxy.scheme)
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
assert self.proxy.scheme in ("http", "https"), \
'Not supported proxy scheme %s' % self.proxy.scheme
connection_pool_kw['_proxy'] = self.proxy
connection_pool_kw['_proxy_headers'] = self.proxy_headers
super(ProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw)
@ -248,10 +271,10 @@ class ProxyManager(PoolManager):
# For proxied HTTPS requests, httplib sets the necessary headers
# on the CONNECT to the proxy. For HTTP, we'll definitely
# need to set 'Host' at the very least.
kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
self.headers))
headers = kw.get('headers', self.headers)
kw['headers'] = self._set_proxy_headers(url, headers)
return super(ProxyManager, self).urlopen(method, url, redirect, **kw)
return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
def proxy_from_url(url, **kw):

View file

@ -1,9 +1,4 @@
# urllib3/request.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
try:
from urllib.parse import urlencode
except ImportError:
@ -26,8 +21,8 @@ class RequestMethods(object):
Specifically,
:meth:`.request_encode_url` is for sending requests whose fields are encoded
in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_url` is for sending requests whose fields are
encoded in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-form-urlencoded
@ -51,7 +46,7 @@ class RequestMethods(object):
def urlopen(self, method, url, body=None, headers=None,
encode_multipart=True, multipart_boundary=None,
**kw): # Abstract
**kw): # Abstract
raise NotImplemented("Classes extending RequestMethods must implement "
"their own ``urlopen`` method.")
@ -61,8 +56,8 @@ class RequestMethods(object):
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
effort. It can be used in most situations, while still having the option
to drop down to more specific methods when necessary, such as
effort. It can be used in most situations, while still having the
option to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
"""
@ -70,21 +65,29 @@ class RequestMethods(object):
if method in self._encode_url_methods:
return self.request_encode_url(method, url, fields=fields,
headers=headers,
**urlopen_kw)
headers=headers,
**urlopen_kw)
else:
return self.request_encode_body(method, url, fields=fields,
headers=headers,
**urlopen_kw)
headers=headers,
**urlopen_kw)
def request_encode_url(self, method, url, fields=None, **urlopen_kw):
def request_encode_url(self, method, url, fields=None, headers=None,
**urlopen_kw):
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
"""
if headers is None:
headers = self.headers
extra_kw = {'headers': headers}
extra_kw.update(urlopen_kw)
if fields:
url += '?' + urlencode(fields)
return self.urlopen(method, url, **urlopen_kw)
return self.urlopen(method, url, **extra_kw)
def request_encode_body(self, method, url, fields=None, headers=None,
encode_multipart=True, multipart_boundary=None,
@ -94,18 +97,18 @@ class RequestMethods(object):
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
:meth:`urllib3.filepost.encode_multipart_formdata` is used to encode the
payload with the appropriate content type. Otherwise
:meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
the payload with the appropriate content type. Otherwise
:meth:`urllib.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
safe to use it in other times too. However, it may break request signing,
such as with OAuth.
safe to use it in other times too. However, it may break request
signing, such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
the MIME type is optional. For example: ::
the MIME type is optional. For example::
fields = {
'foo': 'bar',
@ -119,23 +122,30 @@ class RequestMethods(object):
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimick behavior of browsers.
Note that if ``headers`` are supplied, the 'Content-Type' header will be
overwritten because it depends on the dynamic random boundary string
Note that if ``headers`` are supplied, the 'Content-Type' header will
be overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
"""
if encode_multipart:
body, content_type = encode_multipart_formdata(fields or {},
boundary=multipart_boundary)
else:
body, content_type = (urlencode(fields or {}),
'application/x-www-form-urlencoded')
if headers is None:
headers = self.headers
headers_ = {'Content-Type': content_type}
headers_.update(headers)
extra_kw = {'headers': {}}
return self.urlopen(method, url, body=body, headers=headers_,
**urlopen_kw)
if fields:
if 'body' in urlopen_kw:
raise TypeError(
"request got values for both 'fields' and 'body', can only specify one.")
if encode_multipart:
body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary)
else:
body, content_type = urlencode(fields), 'application/x-www-form-urlencoded'
extra_kw['body'] = body
extra_kw['headers'] = {'Content-Type': content_type}
extra_kw['headers'].update(headers)
extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw)

View file

@ -1,21 +1,18 @@
# urllib3/response.py
# Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
import logging
from __future__ import absolute_import
from contextlib import contextmanager
import zlib
import io
from socket import timeout as SocketTimeout
from socket import error as SocketError
from ._collections import HTTPHeaderDict
from .exceptions import DecodeError
from .packages.six import string_types as basestring, binary_type
from .util import is_fp_closed
log = logging.getLogger(__name__)
from .exceptions import (
ProtocolError, DecodeError, ReadTimeoutError, ResponseNotChunked
)
from .packages.six import string_types as basestring, binary_type, PY3
from .packages.six.moves import http_client as httplib
from .connection import HTTPException, BaseSSLError
from .util.response import is_fp_closed, is_response_to_head
class DeflateDecoder(object):
@ -29,6 +26,9 @@ class DeflateDecoder(object):
return getattr(self._obj, name)
def decompress(self, data):
if not data:
return data
if not self._first_try:
return self._obj.decompress(data)
@ -44,9 +44,23 @@ class DeflateDecoder(object):
self._data = None
class GzipDecoder(object):
def __init__(self):
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
def __getattr__(self, name):
return getattr(self._obj, name)
def decompress(self, data):
if not data:
return data
return self._obj.decompress(data)
def _get_decoder(mode):
if mode == 'gzip':
return zlib.decompressobj(16 + zlib.MAX_WBITS)
return GzipDecoder()
return DeflateDecoder()
@ -56,7 +70,10 @@ class HTTPResponse(io.IOBase):
HTTP Response container.
Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
loaded and decoded on-demand when the ``data`` property is accessed.
loaded and decoded on-demand when the ``data`` property is accessed. This
class is also compatible with the Python standard library's :mod:`io`
module, and can hence be treated as a readable object in the context of that
framework.
Extra parameters for behaviour not present in httplib.HTTPResponse:
@ -81,9 +98,10 @@ class HTTPResponse(io.IOBase):
strict=0, preload_content=True, decode_content=True,
original_response=None, pool=None, connection=None):
self.headers = HTTPHeaderDict()
if headers:
self.headers.update(headers)
if isinstance(headers, HTTPHeaderDict):
self.headers = headers
else:
self.headers = HTTPHeaderDict(headers)
self.status = status
self.version = version
self.reason = reason
@ -91,17 +109,30 @@ class HTTPResponse(io.IOBase):
self.decode_content = decode_content
self._decoder = None
self._body = body if body and isinstance(body, basestring) else None
self._body = None
self._fp = None
self._original_response = original_response
self._fp_bytes_read = 0
if body and isinstance(body, (basestring, binary_type)):
self._body = body
self._pool = pool
self._connection = connection
if hasattr(body, 'read'):
self._fp = body
# Are we using the chunked-style of transfer encoding?
self.chunked = False
self.chunk_left = None
tr_enc = self.headers.get('transfer-encoding', '').lower()
# Don't incur the penalty of creating a list and then discarding it
encodings = (enc.strip() for enc in tr_enc.split(","))
if "chunked" in encodings:
self.chunked = True
# If requested, preload the body.
if preload_content and not self._body:
self._body = self.read(decode_content=decode_content)
@ -142,6 +173,93 @@ class HTTPResponse(io.IOBase):
"""
return self._fp_bytes_read
def _init_decoder(self):
"""
Set-up the _decoder attribute if necessar.
"""
# Note: content-encoding value should be case-insensitive, per RFC 7230
# Section 3.2
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None and content_encoding in self.CONTENT_DECODERS:
self._decoder = _get_decoder(content_encoding)
def _decode(self, data, decode_content, flush_decoder):
"""
Decode the data passed in and potentially flush the decoder.
"""
try:
if decode_content and self._decoder:
data = self._decoder.decompress(data)
except (IOError, zlib.error) as e:
content_encoding = self.headers.get('content-encoding', '').lower()
raise DecodeError(
"Received response with content-encoding: %s, but "
"failed to decode it." % content_encoding, e)
if flush_decoder and decode_content:
data += self._flush_decoder()
return data
def _flush_decoder(self):
"""
Flushes the decoder. Should only be called if the decoder is actually
being used.
"""
if self._decoder:
buf = self._decoder.decompress(b'')
return buf + self._decoder.flush()
return b''
@contextmanager
def _error_catcher(self):
"""
Catch low-level python exceptions, instead re-raising urllib3
variants, so that low-level exceptions are not leaked in the
high-level api.
On exit, release the connection back to the pool.
"""
try:
try:
yield
except SocketTimeout:
# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
# there is yet no clean way to get at it from this context.
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
except BaseSSLError as e:
# FIXME: Is there a better way to differentiate between SSLErrors?
if 'read operation timed out' not in str(e): # Defensive:
# This shouldn't happen but just in case we're missing an edge
# case, let's avoid swallowing SSL errors.
raise
raise ReadTimeoutError(self._pool, None, 'Read timed out.')
except (HTTPException, SocketError) as e:
# This includes IncompleteRead.
raise ProtocolError('Connection broken: %r' % e, e)
except Exception:
# The response may not be closed but we're not going to use it anymore
# so close it now to ensure that the connection is released back to the pool.
if self._original_response and not self._original_response.isclosed():
self._original_response.close()
# Closing the response may not actually be sufficient to close
# everything, so if we have a hold of the connection close that
# too.
if self._connection is not None:
self._connection.close()
raise
finally:
if self._original_response and self._original_response.isclosed():
self.release_conn()
def read(self, amt=None, decode_content=None, cache_content=False):
"""
Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
@ -163,12 +281,7 @@ class HTTPResponse(io.IOBase):
after having ``.read()`` the file object. (Overridden if ``amt`` is
set.)
"""
# Note: content-encoding value should be case-insensitive, per RFC 2616
# Section 3.5
content_encoding = self.headers.get('content-encoding', '').lower()
if self._decoder is None:
if content_encoding in self.CONTENT_DECODERS:
self._decoder = _get_decoder(content_encoding)
self._init_decoder()
if decode_content is None:
decode_content = self.decode_content
@ -176,8 +289,9 @@ class HTTPResponse(io.IOBase):
return
flush_decoder = False
data = None
try:
with self._error_catcher():
if amt is None:
# cStringIO doesn't like amt=None
data = self._fp.read()
@ -190,35 +304,21 @@ class HTTPResponse(io.IOBase):
#
# This is redundant to what httplib/http.client _should_
# already do. However, versions of python released before
# December 15, 2012 (http://bugs.python.org/issue16298) do not
# properly close the connection in all cases. There is no harm
# in redundantly calling close.
# December 15, 2012 (http://bugs.python.org/issue16298) do
# not properly close the connection in all cases. There is
# no harm in redundantly calling close.
self._fp.close()
flush_decoder = True
if data:
self._fp_bytes_read += len(data)
try:
if decode_content and self._decoder:
data = self._decoder.decompress(data)
except (IOError, zlib.error) as e:
raise DecodeError(
"Received response with content-encoding: %s, but "
"failed to decode it." % content_encoding,
e)
if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
data += buf + self._decoder.flush()
data = self._decode(data, decode_content, flush_decoder)
if cache_content:
self._body = data
return data
finally:
if self._original_response and self._original_response.isclosed():
self.release_conn()
return data
def stream(self, amt=2**16, decode_content=None):
"""
@ -236,12 +336,15 @@ class HTTPResponse(io.IOBase):
If True, will attempt to decode the body based on the
'content-encoding' header.
"""
while not is_fp_closed(self._fp):
data = self.read(amt=amt, decode_content=decode_content)
if data:
yield data
if self.chunked:
for line in self.read_chunked(amt, decode_content=decode_content):
yield line
else:
while not is_fp_closed(self._fp):
data = self.read(amt=amt, decode_content=decode_content)
if data:
yield data
@classmethod
def from_httplib(ResponseCls, r, **response_kw):
@ -252,14 +355,17 @@ class HTTPResponse(io.IOBase):
Remaining parameters are passed to the HTTPResponse constructor, along
with ``original_response=r``.
"""
headers = r.msg
headers = HTTPHeaderDict()
for k, v in r.getheaders():
headers.add(k, v)
if not isinstance(headers, HTTPHeaderDict):
if PY3: # Python 3
headers = HTTPHeaderDict(headers.items())
else: # Python 2
headers = HTTPHeaderDict.from_httplib(headers)
# HTTPResponse objects in Python 3 don't have a .strict attribute
strict = getattr(r, 'strict', 0)
return ResponseCls(body=r,
resp = ResponseCls(body=r,
headers=headers,
status=r.status,
version=r.version,
@ -267,6 +373,7 @@ class HTTPResponse(io.IOBase):
strict=strict,
original_response=r,
**response_kw)
return resp
# Backwards-compatibility methods for httplib.HTTPResponse
def getheaders(self):
@ -297,7 +404,7 @@ class HTTPResponse(io.IOBase):
elif hasattr(self._fp, "fileno"):
return self._fp.fileno()
else:
raise IOError("The file-like object this HTTPResponse is wrapped "
raise IOError("The file-like object this HTTPResponse is wrapped "
"around has no file descriptor")
def flush(self):
@ -305,4 +412,103 @@ class HTTPResponse(io.IOBase):
return self._fp.flush()
def readable(self):
# This method is required for `io` module compatibility.
return True
def readinto(self, b):
# This method is required for `io` module compatibility.
temp = self.read(len(b))
if len(temp) == 0:
return 0
else:
b[:len(temp)] = temp
return len(temp)
def _update_chunk_length(self):
# First, we'll figure out length of a chunk and then
# we'll try to read it from socket.
if self.chunk_left is not None:
return
line = self._fp.fp.readline()
line = line.split(b';', 1)[0]
try:
self.chunk_left = int(line, 16)
except ValueError:
# Invalid chunked protocol response, abort.
self.close()
raise httplib.IncompleteRead(line)
def _handle_chunk(self, amt):
returned_chunk = None
if amt is None:
chunk = self._fp._safe_read(self.chunk_left)
returned_chunk = chunk
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
self.chunk_left = None
elif amt < self.chunk_left:
value = self._fp._safe_read(amt)
self.chunk_left = self.chunk_left - amt
returned_chunk = value
elif amt == self.chunk_left:
value = self._fp._safe_read(amt)
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
self.chunk_left = None
returned_chunk = value
else: # amt > self.chunk_left
returned_chunk = self._fp._safe_read(self.chunk_left)
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
self.chunk_left = None
return returned_chunk
def read_chunked(self, amt=None, decode_content=None):
"""
Similar to :meth:`HTTPResponse.read`, but with an additional
parameter: ``decode_content``.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
"""
self._init_decoder()
# FIXME: Rewrite this method and make it a class with a better structured logic.
if not self.chunked:
raise ResponseNotChunked(
"Response is not chunked. "
"Header 'transfer-encoding: chunked' is missing.")
# Don't bother reading the body of a HEAD request.
if self._original_response and is_response_to_head(self._original_response):
self._original_response.close()
return
with self._error_catcher():
while True:
self._update_chunk_length()
if self.chunk_left == 0:
break
chunk = self._handle_chunk(amt)
decoded = self._decode(chunk, decode_content=decode_content,
flush_decoder=False)
if decoded:
yield decoded
if decode_content:
# On CPython and PyPy, we should never need to flush the
# decoder. However, on Jython we *might* need to, so
# lets defensively do it anyway.
decoded = self._flush_decoder()
if decoded: # Platform-specific: Jython.
yield decoded
# Chunk content ends with \r\n: discard it.
while True:
line = self._fp.fp.readline()
if not line:
# Some sites may not end with '\r\n'.
break
if line == b'\r\n':
break
# We read everything; close the "file".
if self._original_response:
self._original_response.close()

View file

@ -1,9 +1,5 @@
# urllib3/util/__init__.py
# Copyright 2008-2014 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
#
# This module is part of urllib3 and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
from __future__ import absolute_import
# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import make_headers
from .response import is_fp_closed
@ -19,9 +15,30 @@ from .timeout import (
current_time,
Timeout,
)
from .retry import Retry
from .url import (
get_host,
parse_url,
split_first,
Url,
)
__all__ = (
'HAS_SNI',
'SSLContext',
'Retry',
'Timeout',
'Url',
'assert_fingerprint',
'current_time',
'is_connection_dropped',
'is_fp_closed',
'get_host',
'parse_url',
'make_headers',
'resolve_cert_reqs',
'resolve_ssl_version',
'split_first',
'ssl_wrap_socket',
)

View file

@ -1,4 +1,5 @@
from socket import error as SocketError
from __future__ import absolute_import
import socket
try:
from select import poll, POLLIN
except ImportError: # `poll` doesn't exist on OSX and other platforms
@ -8,6 +9,7 @@ except ImportError: # `poll` doesn't exist on OSX and other platforms
except ImportError: # `select` doesn't exist on AppEngine.
select = False
def is_connection_dropped(conn): # Platform-specific
"""
Returns True if the connection is dropped and should be closed.
@ -22,7 +24,7 @@ def is_connection_dropped(conn): # Platform-specific
if sock is False: # Platform-specific: AppEngine
return False
if sock is None: # Connection already closed (such as by httplib).
return False
return True
if not poll:
if not select: # Platform-specific: AppEngine
@ -30,7 +32,7 @@ def is_connection_dropped(conn): # Platform-specific
try:
return select([sock], [], [], 0.0)[0]
except SocketError:
except socket.error:
return True
# This version is better on platforms that support it.
@ -42,4 +44,58 @@ def is_connection_dropped(conn): # Platform-specific
return True
# This function is copied from socket.py in the Python 2.7 standard
# library test suite. Added to its signature is only `socket_options`.
def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None, socket_options=None):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith('['):
host = host.strip('[]')
err = None
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
# This is the only addition urllib3 makes to this function.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
return sock
except socket.error as e:
err = e
if sock is not None:
sock.close()
sock = None
if err is not None:
raise err
raise socket.error("getaddrinfo returns an empty list")
def _set_socket_options(sock, options):
if options is None:
return
for opt in options:
sock.setsockopt(*opt)

View file

@ -1,13 +1,13 @@
from __future__ import absolute_import
from base64 import b64encode
from ..packages import six
from ..packages.six import b
ACCEPT_ENCODING = 'gzip,deflate'
def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
basic_auth=None, proxy_basic_auth=None):
basic_auth=None, proxy_basic_auth=None, disable_cache=None):
"""
Shortcuts for generating request headers.
@ -32,7 +32,10 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
Colon-separated username:password string for 'proxy-authorization: basic ...'
auth header.
Example: ::
:param disable_cache:
If ``True``, adds 'cache-control: no-cache' header.
Example::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
@ -57,12 +60,13 @@ def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
if basic_auth:
headers['authorization'] = 'Basic ' + \
b64encode(six.b(basic_auth)).decode('utf-8')
b64encode(b(basic_auth)).decode('utf-8')
if proxy_basic_auth:
headers['proxy-authorization'] = 'Basic ' + \
b64encode(six.b(proxy_basic_auth)).decode('utf-8')
b64encode(b(proxy_basic_auth)).decode('utf-8')
if disable_cache:
headers['cache-control'] = 'no-cache'
return headers

View file

@ -1,3 +1,9 @@
from __future__ import absolute_import
from ..packages.six.moves import http_client as httplib
from ..exceptions import HeaderParsingError
def is_fp_closed(obj):
"""
Checks whether a given file-like object is closed.
@ -5,9 +11,64 @@ def is_fp_closed(obj):
:param obj:
The file-like object to check.
"""
if hasattr(obj, 'fp'):
# Object is a container for another file-like object that gets released
# on exhaustion (e.g. HTTPResponse)
return obj.fp is None
return obj.closed
try:
# Check via the official file-like-object way.
return obj.closed
except AttributeError:
pass
try:
# Check if the object is a container for another file-like object that
# gets released on exhaustion (e.g. HTTPResponse).
return obj.fp is None
except AttributeError:
pass
raise ValueError("Unable to determine whether fp is closed.")
def assert_header_parsing(headers):
"""
Asserts whether all headers have been successfully parsed.
Extracts encountered errors from the result of parsing headers.
Only works on Python 3.
:param headers: Headers to verify.
:type headers: `httplib.HTTPMessage`.
:raises urllib3.exceptions.HeaderParsingError:
If parsing errors are found.
"""
# This will fail silently if we pass in the wrong kind of parameter.
# To make debugging easier add an explicit check.
if not isinstance(headers, httplib.HTTPMessage):
raise TypeError('expected httplib.Message, got {0}.'.format(
type(headers)))
defects = getattr(headers, 'defects', None)
get_payload = getattr(headers, 'get_payload', None)
unparsed_data = None
if get_payload: # Platform-specific: Python 3.
unparsed_data = get_payload()
if defects or unparsed_data:
raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
def is_response_to_head(response):
"""
Checks, wether a the request of a response has been a HEAD-request.
Handles the quirks of AppEngine.
:param conn:
:type conn: :class:`httplib.HTTPResponse`
"""
# FIXME: Can we do this somehow without accessing private httplib _method?
method = response._method
if isinstance(method, int): # Platform-specific: Appengine
return method == 3
return method.upper() == 'HEAD'

View file

@ -0,0 +1,286 @@
from __future__ import absolute_import
import time
import logging
from ..exceptions import (
ConnectTimeoutError,
MaxRetryError,
ProtocolError,
ReadTimeoutError,
ResponseError,
)
from ..packages import six
log = logging.getLogger(__name__)
class Retry(object):
""" Retry configuration.
Each retry attempt will create a new Retry object with updated values, so
they can be safely reused.
Retries can be defined as a default for a pool::
retries = Retry(connect=5, read=2, redirect=5)
http = PoolManager(retries=retries)
response = http.request('GET', 'http://example.com/')
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', retries=Retry(10))
Retries can be disabled by passing ``False``::
response = http.request('GET', 'http://example.com/', retries=False)
Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
retries are disabled, in which case the causing exception will be raised.
:param int total:
Total number of retries to allow. Takes precedence over other counts.
Set to ``None`` to remove this constraint and fall back on other
counts. It's a good idea to set this to some sensibly-high value to
account for unexpected edge cases and avoid infinite retry loops.
Set to ``0`` to fail on the first retry.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int connect:
How many connection-related errors to retry on.
These are errors raised before the request is sent to the remote server,
which we assume has not triggered the server to process the request.
Set to ``0`` to fail on the first retry of this type.
:param int read:
How many times to retry on read errors.
These errors are raised after the request was sent to the server, so the
request may have side-effects.
Set to ``0`` to fail on the first retry of this type.
:param int redirect:
How many redirects to perform. Limit this to avoid infinite redirect
loops.
A redirect is a HTTP response with a status code 301, 302, 303, 307 or
308.
Set to ``0`` to fail on the first retry of this type.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param iterable method_whitelist:
Set of uppercased HTTP method verbs that we should retry on.
By default, we only retry on methods which are considered to be
indempotent (multiple requests with the same parameters end with the
same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
:param iterable status_forcelist:
A set of HTTP status codes that we should force a retry on.
By default, this is disabled with ``None``.
:param float backoff_factor:
A backoff factor to apply between attempts. urllib3 will sleep for::
{backoff factor} * (2 ^ ({number of total retries} - 1))
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
for [0.1s, 0.2s, 0.4s, ...] between retries. It will never be longer
than :attr:`Retry.BACKOFF_MAX`.
By default, backoff is disabled (set to 0).
:param bool raise_on_redirect: Whether, if the number of redirects is
exhausted, to raise a MaxRetryError, or to return a response with a
response code in the 3xx range.
"""
DEFAULT_METHOD_WHITELIST = frozenset([
'HEAD', 'GET', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'])
#: Maximum backoff time.
BACKOFF_MAX = 120
def __init__(self, total=10, connect=None, read=None, redirect=None,
method_whitelist=DEFAULT_METHOD_WHITELIST, status_forcelist=None,
backoff_factor=0, raise_on_redirect=True, _observed_errors=0):
self.total = total
self.connect = connect
self.read = read
if redirect is False or total is False:
redirect = 0
raise_on_redirect = False
self.redirect = redirect
self.status_forcelist = status_forcelist or set()
self.method_whitelist = method_whitelist
self.backoff_factor = backoff_factor
self.raise_on_redirect = raise_on_redirect
self._observed_errors = _observed_errors # TODO: use .history instead?
def new(self, **kw):
params = dict(
total=self.total,
connect=self.connect, read=self.read, redirect=self.redirect,
method_whitelist=self.method_whitelist,
status_forcelist=self.status_forcelist,
backoff_factor=self.backoff_factor,
raise_on_redirect=self.raise_on_redirect,
_observed_errors=self._observed_errors,
)
params.update(kw)
return type(self)(**params)
@classmethod
def from_int(cls, retries, redirect=True, default=None):
""" Backwards-compatibility for the old retries format."""
if retries is None:
retries = default if default is not None else cls.DEFAULT
if isinstance(retries, Retry):
return retries
redirect = bool(redirect) and None
new_retries = cls(retries, redirect=redirect)
log.debug("Converted retries value: %r -> %r" % (retries, new_retries))
return new_retries
def get_backoff_time(self):
""" Formula for computing the current backoff
:rtype: float
"""
if self._observed_errors <= 1:
return 0
backoff_value = self.backoff_factor * (2 ** (self._observed_errors - 1))
return min(self.BACKOFF_MAX, backoff_value)
def sleep(self):
""" Sleep between retry attempts using an exponential backoff.
By default, the backoff factor is 0 and this method will return
immediately.
"""
backoff = self.get_backoff_time()
if backoff <= 0:
return
time.sleep(backoff)
def _is_connection_error(self, err):
""" Errors when we're fairly sure that the server did not receive the
request, so it should be safe to retry.
"""
return isinstance(err, ConnectTimeoutError)
def _is_read_error(self, err):
""" Errors that occur after the request has been started, so we should
assume that the server began processing it.
"""
return isinstance(err, (ReadTimeoutError, ProtocolError))
def is_forced_retry(self, method, status_code):
""" Is this method/status code retryable? (Based on method/codes whitelists)
"""
if self.method_whitelist and method.upper() not in self.method_whitelist:
return False
return self.status_forcelist and status_code in self.status_forcelist
def is_exhausted(self):
""" Are we out of retries? """
retry_counts = (self.total, self.connect, self.read, self.redirect)
retry_counts = list(filter(None, retry_counts))
if not retry_counts:
return False
return min(retry_counts) < 0
def increment(self, method=None, url=None, response=None, error=None,
_pool=None, _stacktrace=None):
""" Return a new Retry object with incremented retry counters.
:param response: A response object, or None, if the server did not
return a response.
:type response: :class:`~urllib3.response.HTTPResponse`
:param Exception error: An error encountered during the request, or
None if the response was received successfully.
:return: A new ``Retry`` object.
"""
if self.total is False and error:
# Disabled, indicate to re-raise the error.
raise six.reraise(type(error), error, _stacktrace)
total = self.total
if total is not None:
total -= 1
_observed_errors = self._observed_errors
connect = self.connect
read = self.read
redirect = self.redirect
cause = 'unknown'
if error and self._is_connection_error(error):
# Connect retry?
if connect is False:
raise six.reraise(type(error), error, _stacktrace)
elif connect is not None:
connect -= 1
_observed_errors += 1
elif error and self._is_read_error(error):
# Read retry?
if read is False:
raise six.reraise(type(error), error, _stacktrace)
elif read is not None:
read -= 1
_observed_errors += 1
elif response and response.get_redirect_location():
# Redirect retry?
if redirect is not None:
redirect -= 1
cause = 'too many redirects'
else:
# Incrementing because of a server error like a 500 in
# status_forcelist and a the given method is in the whitelist
_observed_errors += 1
cause = ResponseError.GENERIC_ERROR
if response and response.status:
cause = ResponseError.SPECIFIC_ERROR.format(
status_code=response.status)
new_retry = self.new(
total=total,
connect=connect, read=read, redirect=redirect,
_observed_errors=_observed_errors)
if new_retry.is_exhausted():
raise MaxRetryError(_pool, url, error or ResponseError(cause))
log.debug("Incremented Retry for (url='%s'): %r" % (url, new_retry))
return new_retry
def __repr__(self):
return ('{cls.__name__}(total={self.total}, connect={self.connect}, '
'read={self.read}, redirect={self.redirect})').format(
cls=type(self), self=self)
# For backwards compatibility (equivalent to pre-v1.9):
Retry.DEFAULT = Retry(3)

View file

@ -1,21 +1,137 @@
from binascii import hexlify, unhexlify
from hashlib import md5, sha1
from __future__ import absolute_import
import errno
import warnings
import hmac
from ..exceptions import SSLError
from binascii import hexlify, unhexlify
from hashlib import md5, sha1, sha256
from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning
SSLContext = None
HAS_SNI = False
create_default_context = None
# Maps the length of a digest to a possible hash function producing this digest
HASHFUNC_MAP = {
32: md5,
40: sha1,
64: sha256,
}
def _const_compare_digest_backport(a, b):
"""
Compare two digests of equal length in constant time.
The digests must be of type str/bytes.
Returns True if the digests match, and False otherwise.
"""
result = abs(len(a) - len(b))
for l, r in zip(bytearray(a), bytearray(b)):
result |= l ^ r
return result == 0
_const_compare_digest = getattr(hmac, 'compare_digest',
_const_compare_digest_backport)
try: # Test for SSL features
SSLContext = None
HAS_SNI = False
import ssl
from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
from ssl import SSLContext # Modern SSL?
from ssl import HAS_SNI # Has SNI?
except ImportError:
pass
try:
from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
except ImportError:
OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
OP_NO_COMPRESSION = 0x20000
# A secure default.
# Sources for more information on TLS ciphers:
#
# - https://wiki.mozilla.org/Security/Server_Side_TLS
# - https://www.ssllabs.com/projects/best-practices/index.html
# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
#
# The general intent is:
# - Prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
# - prefer ECDHE over DHE for better performance,
# - prefer any AES-GCM over any AES-CBC for better performance and security,
# - use 3DES as fallback which is secure but slow,
# - disable NULL authentication, MD5 MACs and DSS for security reasons.
DEFAULT_CIPHERS = (
'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:!aNULL:'
'!eNULL:!MD5'
)
try:
from ssl import SSLContext # Modern SSL?
except ImportError:
import sys
class SSLContext(object): # Platform-specific: Python 2 & 3.1
supports_set_ciphers = ((2, 7) <= sys.version_info < (3,) or
(3, 2) <= sys.version_info)
def __init__(self, protocol_version):
self.protocol = protocol_version
# Use default values from a real SSLContext
self.check_hostname = False
self.verify_mode = ssl.CERT_NONE
self.ca_certs = None
self.options = 0
self.certfile = None
self.keyfile = None
self.ciphers = None
def load_cert_chain(self, certfile, keyfile):
self.certfile = certfile
self.keyfile = keyfile
def load_verify_locations(self, cafile=None, capath=None):
self.ca_certs = cafile
if capath is not None:
raise SSLError("CA directories not supported in older Pythons")
def set_ciphers(self, cipher_suite):
if not self.supports_set_ciphers:
raise TypeError(
'Your version of Python does not support setting '
'a custom cipher suite. Please upgrade to Python '
'2.7, 3.2, or later if you need this functionality.'
)
self.ciphers = cipher_suite
def wrap_socket(self, socket, server_hostname=None):
warnings.warn(
'A true SSLContext object is not available. This prevents '
'urllib3 from configuring SSL appropriately and may cause '
'certain SSL connections to fail. For more information, see '
'https://urllib3.readthedocs.org/en/latest/security.html'
'#insecureplatformwarning.',
InsecurePlatformWarning
)
kwargs = {
'keyfile': self.keyfile,
'certfile': self.certfile,
'ca_certs': self.ca_certs,
'cert_reqs': self.verify_mode,
'ssl_version': self.protocol,
}
if self.supports_set_ciphers: # Platform-specific: Python 2.7+
return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
else: # Platform-specific: Python 2.6
return wrap_socket(socket, **kwargs)
def assert_fingerprint(cert, fingerprint):
"""
Checks if given fingerprint matches the supplied certificate.
@ -26,31 +142,21 @@ def assert_fingerprint(cert, fingerprint):
Fingerprint as string of hexdigits, can be interspersed by colons.
"""
# Maps the length of a digest to a possible hash function producing
# this digest.
hashfunc_map = {
16: md5,
20: sha1
}
fingerprint = fingerprint.replace(':', '').lower()
digest_length, rest = divmod(len(fingerprint), 2)
if rest or digest_length not in hashfunc_map:
raise SSLError('Fingerprint is of invalid length.')
digest_length = len(fingerprint)
hashfunc = HASHFUNC_MAP.get(digest_length)
if not hashfunc:
raise SSLError(
'Fingerprint of invalid length: {0}'.format(fingerprint))
# We need encode() here for py32; works on py2 and p33.
fingerprint_bytes = unhexlify(fingerprint.encode())
hashfunc = hashfunc_map[digest_length]
cert_digest = hashfunc(cert).digest()
if not cert_digest == fingerprint_bytes:
if not _const_compare_digest(cert_digest, fingerprint_bytes):
raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
.format(hexlify(fingerprint_bytes),
hexlify(cert_digest)))
.format(fingerprint, hexlify(cert_digest)))
def resolve_cert_reqs(candidate):
@ -92,42 +198,120 @@ def resolve_ssl_version(candidate):
return candidate
if SSLContext is not None: # Python 3.2+
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None):
"""
All arguments except `server_hostname` have the same meaning as for
:func:`ssl.wrap_socket`
def create_urllib3_context(ssl_version=None, cert_reqs=None,
options=None, ciphers=None):
"""All arguments have the same meaning as ``ssl_wrap_socket``.
:param server_hostname:
Hostname of the expected certificate
"""
context = SSLContext(ssl_version)
context.verify_mode = cert_reqs
By default, this function does a lot of the same work that
``ssl.create_default_context`` does on Python 3.4+. It:
# Disable TLS compression to migitate CRIME attack (issue #309)
OP_NO_COMPRESSION = 0x20000
context.options |= OP_NO_COMPRESSION
- Disables SSLv2, SSLv3, and compression
- Sets a restricted set of server ciphers
if ca_certs:
try:
context.load_verify_locations(ca_certs)
# Py32 raises IOError
# Py33 raises FileNotFoundError
except Exception as e: # Reraise as SSLError
If you wish to enable SSLv3, you can do::
from urllib3.util import ssl_
context = ssl_.create_urllib3_context()
context.options &= ~ssl_.OP_NO_SSLv3
You can do the same to enable compression (substituting ``COMPRESSION``
for ``SSLv3`` in the last line above).
:param ssl_version:
The desired protocol version to use. This will default to
PROTOCOL_SSLv23 which will negotiate the highest protocol that both
the server and your installation of OpenSSL support.
:param cert_reqs:
Whether to require the certificate verification. This defaults to
``ssl.CERT_REQUIRED``.
:param options:
Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
:param ciphers:
Which cipher suites to allow the server to select.
:returns:
Constructed SSLContext object with specified options
:rtype: SSLContext
"""
context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23)
# Setting the default here, as we may have no ssl module on import
cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
if options is None:
options = 0
# SSLv2 is easily broken and is considered harmful and dangerous
options |= OP_NO_SSLv2
# SSLv3 has several problems and is now dangerous
options |= OP_NO_SSLv3
# Disable compression to prevent CRIME attacks for OpenSSL 1.0+
# (issue #309)
options |= OP_NO_COMPRESSION
context.options |= options
if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6
context.set_ciphers(ciphers or DEFAULT_CIPHERS)
context.verify_mode = cert_reqs
if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2
# We do our own verification, including fingerprints and alternative
# hostnames. So disable it here
context.check_hostname = False
return context
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None, ciphers=None, ssl_context=None,
ca_cert_dir=None):
"""
All arguments except for server_hostname, ssl_context, and ca_cert_dir have
the same meaning as they do when using :func:`ssl.wrap_socket`.
:param server_hostname:
When SNI is supported, the expected hostname of the certificate
:param ssl_context:
A pre-made :class:`SSLContext` object. If none is provided, one will
be created using :func:`create_urllib3_context`.
:param ciphers:
A string of ciphers we wish the client to support. This is not
supported on Python 2.6 as the ssl module does not support it.
:param ca_cert_dir:
A directory containing CA certificates in multiple separate files, as
supported by OpenSSL's -CApath flag or the capath argument to
SSLContext.load_verify_locations().
"""
context = ssl_context
if context is None:
context = create_urllib3_context(ssl_version, cert_reqs,
ciphers=ciphers)
if ca_certs or ca_cert_dir:
try:
context.load_verify_locations(ca_certs, ca_cert_dir)
except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2
raise SSLError(e)
# Py33 raises FileNotFoundError which subclasses OSError
# These are not equivalent unless we check the errno attribute
except OSError as e: # Platform-specific: Python 3.3 and beyond
if e.errno == errno.ENOENT:
raise SSLError(e)
if certfile:
# FIXME: This block needs a test.
context.load_cert_chain(certfile, keyfile)
if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
return context.wrap_socket(sock, server_hostname=server_hostname)
return context.wrap_socket(sock)
raise
else: # Python 3.1 and earlier
def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
ca_certs=None, server_hostname=None,
ssl_version=None):
return wrap_socket(sock, keyfile=keyfile, certfile=certfile,
ca_certs=ca_certs, cert_reqs=cert_reqs,
ssl_version=ssl_version)
if certfile:
context.load_cert_chain(certfile, keyfile)
if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
return context.wrap_socket(sock, server_hostname=server_hostname)
warnings.warn(
'An HTTPS request has been made, but the SNI (Subject Name '
'Indication) extension to TLS is not available on this platform. '
'This may cause the server to present an incorrect TLS '
'certificate, which can cause validation failures. For more '
'information, see '
'https://urllib3.readthedocs.org/en/latest/security.html'
'#snimissingwarning.',
SNIMissingWarning
)
return context.wrap_socket(sock)

View file

@ -1,32 +1,51 @@
from __future__ import absolute_import
# The default socket timeout, used by httplib to indicate that no timeout was
# specified by the user
from socket import _GLOBAL_DEFAULT_TIMEOUT
import time
from ..exceptions import TimeoutStateError
# A sentinel value to indicate that no timeout was specified by the user in
# urllib3
_Default = object()
def current_time():
"""
Retrieve the current time, this function is mocked out in unit testing.
Retrieve the current time. This function is mocked out in unit testing.
"""
return time.time()
_Default = object()
# The default timeout to use for socket connections. This is the attribute used
# by httplib to define the default timeout
class Timeout(object):
"""
Utility object for storing timeout values.
""" Timeout configuration.
Example usage:
Timeouts can be defined as a default for a pool::
.. code-block:: python
timeout = Timeout(connect=2.0, read=7.0)
http = PoolManager(timeout=timeout)
response = http.request('GET', 'http://example.com/')
timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
pool = HTTPConnectionPool('www.google.com', 80, timeout=timeout)
pool.request(...) # Etc, etc
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
Timeouts can be disabled by setting all the parameters to ``None``::
no_timeout = Timeout(connect=None, read=None)
response = http.request('GET', 'http://example.com/, timeout=no_timeout)
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
:param connect:
The maximum amount of time to wait for a connection attempt to a server
@ -47,25 +66,15 @@ class Timeout(object):
:type read: integer, float, or None
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
.. note::
Many factors can affect the total amount of time for urllib3 to return
an HTTP response. Specifically, Python's DNS resolver does not obey the
timeout specified on the socket. Other factors that can affect total
request time include high CPU load, high swap, the program running at a
low priority level, or other behaviors. The observed running time for
urllib3 to return a response may be greater than the value passed to
`total`.
an HTTP response.
For example, Python's DNS resolver does not obey the timeout specified
on the socket. Other factors that can affect total request time include
high CPU load, high swap, the program running at a low priority level,
or other behaviors.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server,
@ -73,8 +82,8 @@ class Timeout(object):
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
of 20 seconds will not ever trigger, even though the request will
take several minutes to complete.
of 20 seconds will not trigger, even though the request will take
several minutes to complete.
If your goal is to cut off any request after a set amount of wall clock
time, consider having a second "watcher" thread to cut off a slow
@ -94,17 +103,16 @@ class Timeout(object):
return '%s(connect=%r, read=%r, total=%r)' % (
type(self).__name__, self._connect, self._read, self.total)
@classmethod
def _validate_timeout(cls, value, name):
""" Check that a timeout attribute is valid
""" Check that a timeout attribute is valid.
:param value: The timeout value to validate
:param name: The name of the timeout attribute to validate. This is used
for clear error messages
:return: the value
:raises ValueError: if the type is not an integer or a float, or if it
is a numeric value less than zero
:param name: The name of the timeout attribute to validate. This is
used to specify in error messages.
:return: The validated and casted version of the given value.
:raises ValueError: If the type is not an integer or a float, or if it
is a numeric value less than zero.
"""
if value is _Default:
return cls.DEFAULT_TIMEOUT
@ -123,7 +131,7 @@ class Timeout(object):
raise ValueError("Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than 0." % (name, value))
except TypeError: # Python 3
except TypeError: # Python 3
raise ValueError("Timeout value %s was %s, but it must be an "
"int or float." % (name, value))
@ -135,12 +143,12 @@ class Timeout(object):
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
object that sets the individual timeouts to the ``timeout`` value passed
to this function.
object that sets the individual timeouts to the ``timeout`` value
passed to this function.
:param timeout: The legacy timeout value
:param timeout: The legacy timeout value.
:type timeout: integer, float, sentinel default object, or None
:return: a Timeout object
:return: Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
@ -174,7 +182,7 @@ class Timeout(object):
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
:return: the elapsed time
:return: Elapsed time.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
@ -191,7 +199,7 @@ class Timeout(object):
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
:return: the connect timeout
:return: Connect timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
@ -214,16 +222,16 @@ class Timeout(object):
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
:return: the value to use for the read timeout
:return: Value to use for the read timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
"""
if (self.total is not None and
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
# in case the connect timeout has not yet been established.
self.total is not self.DEFAULT_TIMEOUT and
self._read is not None and
self._read is not self.DEFAULT_TIMEOUT):
# In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(),

View file

@ -1,17 +1,25 @@
from __future__ import absolute_import
from collections import namedtuple
from ..exceptions import LocationParseError
class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'])):
url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
class Url(namedtuple('Url', url_attrs)):
"""
Datastructure for representing an HTTP URL. Used as a return value for
:func:`parse_url`.
"""
slots = ()
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None):
return super(Url, cls).__new__(cls, scheme, auth, host, port, path, query, fragment)
def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
query=None, fragment=None):
if path and not path.startswith('/'):
path = '/' + path
return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
query, fragment)
@property
def hostname(self):
@ -35,6 +43,49 @@ class Url(namedtuple('Url', ['scheme', 'auth', 'host', 'port', 'path', 'query',
return '%s:%d' % (self.host, self.port)
return self.host
@property
def url(self):
"""
Convert self into a url
This function should more or less round-trip with :func:`.parse_url`. The
returned url may not be exactly the same as the url inputted to
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
with a blank port will have : removed).
Example: ::
>>> U = parse_url('http://google.com/mail/')
>>> U.url
'http://google.com/mail/'
>>> Url('http', 'username:password', 'host.com', 80,
... '/path', 'query', 'fragment').url
'http://username:password@host.com:80/path?query#fragment'
"""
scheme, auth, host, port, path, query, fragment = self
url = ''
# We use "is not None" we want things to happen with empty strings (or 0 port)
if scheme is not None:
url += scheme + '://'
if auth is not None:
url += auth + '@'
if host is not None:
url += host
if port is not None:
url += ':' + str(port)
if path is not None:
url += path
if query is not None:
url += '?' + query
if fragment is not None:
url += '#' + fragment
return url
def __str__(self):
return self.url
def split_first(s, delims):
"""
@ -43,7 +94,7 @@ def split_first(s, delims):
If not found, then the first part is the full input string.
Example: ::
Example::
>>> split_first('foo/bar?baz', '?/=')
('foo', 'bar?baz', '/')
@ -66,7 +117,7 @@ def split_first(s, delims):
if min_idx is None or min_idx < 0:
return s, '', None
return s[:min_idx], s[min_idx+1:], min_delim
return s[:min_idx], s[min_idx + 1:], min_delim
def parse_url(url):
@ -76,10 +127,10 @@ def parse_url(url):
Partly backwards-compatible with :mod:`urlparse`.
Example: ::
Example::
>>> parse_url('http://google.com/mail/')
Url(scheme='http', host='google.com', port=None, path='/', ...)
Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
>>> parse_url('google.com:80')
Url(scheme=None, host='google.com', port=80, path=None, ...)
>>> parse_url('/foo?bar')
@ -91,6 +142,10 @@ def parse_url(url):
# Additionally, this implementations does silly things to be optimal
# on CPython.
if not url:
# Empty
return Url()
scheme = None
auth = None
host = None