update path, add python unrar

This commit is contained in:
j 2019-01-29 16:10:06 +05:30
commit 00165d302e
862 changed files with 804 additions and 6 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,271 @@
# Copyright 2017-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Parsing for `Tor Ed25519 certificates
<https://gitweb.torproject.org/torspec.git/tree/cert-spec.txt>`_, which are
used to validate the key used to sign server descriptors.
.. versionadded:: 1.6.0
**Module Overview:**
::
Ed25519Certificate - Ed25519 signing key certificate
| +- Ed25519CertificateV1 - version 1 Ed25519 certificate
| |- is_expired - checks if certificate is presently expired
| +- validate - validates signature of a server descriptor
|
+- parse - reads base64 encoded certificate data
Ed25519Extension - extension included within an Ed25519Certificate
.. data:: CertType (enum)
Purpose of Ed25519 certificate. As new certificate versions are added this
enumeration will expand.
============== ===========
CertType Description
============== ===========
**SIGNING** signing a signing key with an identity key
**LINK_CERT** TLS link certificate signed with ed25519 signing key
**AUTH** authentication key signed with ed25519 signing key
============== ===========
.. data:: ExtensionType (enum)
Recognized exception types.
==================== ===========
ExtensionType Description
==================== ===========
**HAS_SIGNING_KEY** includes key used to sign the certificate
==================== ===========
.. data:: ExtensionFlag (enum)
Flags that can be assigned to Ed25519 certificate extensions.
====================== ===========
ExtensionFlag Description
====================== ===========
**AFFECTS_VALIDATION** extension affects whether the certificate is valid
**UNKNOWN** extension includes flags not yet recognized by stem
====================== ===========
"""
import base64
import binascii
import collections
import datetime
import hashlib
import stem.prereq
import stem.util.enum
import stem.util.str_tools
ED25519_HEADER_LENGTH = 40
ED25519_SIGNATURE_LENGTH = 64
ED25519_ROUTER_SIGNATURE_PREFIX = b'Tor router descriptor signature v1'
CertType = stem.util.enum.UppercaseEnum('SIGNING', 'LINK_CERT', 'AUTH')
ExtensionType = stem.util.enum.Enum(('HAS_SIGNING_KEY', 4),)
ExtensionFlag = stem.util.enum.UppercaseEnum('AFFECTS_VALIDATION', 'UNKNOWN')
class Ed25519Extension(collections.namedtuple('Ed25519Extension', ['type', 'flags', 'flag_int', 'data'])):
"""
Extension within an Ed25519 certificate.
:var int type: extension type
:var list flags: extension attribute flags
:var int flag_int: integer encoding of the extension attribute flags
:var bytes data: data the extension concerns
"""
class Ed25519Certificate(object):
"""
Base class for an Ed25519 certificate.
:var int version: certificate format version
:var str encoded: base64 encoded ed25519 certificate
"""
def __init__(self, version, encoded):
self.version = version
self.encoded = encoded
@staticmethod
def parse(content):
"""
Parses the given base64 encoded data as an Ed25519 certificate.
:param str content: base64 encoded certificate
:returns: :class:`~stem.descriptor.certificate.Ed25519Certificate` subclsss
for the given certificate
:raises: **ValueError** if content is malformed
"""
try:
decoded = base64.b64decode(stem.util.str_tools._to_bytes(content))
if not decoded:
raise TypeError('empty')
except (TypeError, binascii.Error) as exc:
raise ValueError("Ed25519 certificate wasn't propoerly base64 encoded (%s):\n%s" % (exc, content))
version = stem.util.str_tools._to_int(decoded[0:1])
if version == 1:
return Ed25519CertificateV1(version, content, decoded)
else:
raise ValueError('Ed25519 certificate is version %i. Parser presently only supports version 1.' % version)
class Ed25519CertificateV1(Ed25519Certificate):
"""
Version 1 Ed25519 certificate, which are used for signing tor server
descriptors.
:var CertType type: certificate purpose
:var datetime expiration: expiration of the certificate
:var int key_type: format of the key
:var bytes key: key content
:var list extensions: :class:`~stem.descriptor.certificate.Ed25519Extension` in this certificate
:var bytes signature: certificate signature
"""
def __init__(self, version, encoded, decoded):
super(Ed25519CertificateV1, self).__init__(version, encoded)
if len(decoded) < ED25519_HEADER_LENGTH + ED25519_SIGNATURE_LENGTH:
raise ValueError('Ed25519 certificate was %i bytes, but should be at least %i' % (len(decoded), ED25519_HEADER_LENGTH + ED25519_SIGNATURE_LENGTH))
cert_type = stem.util.str_tools._to_int(decoded[1:2])
if cert_type in (0, 1, 2, 3):
raise ValueError('Ed25519 certificate cannot have a type of %i. This is reserved to avoid conflicts with tor CERTS cells.' % cert_type)
elif cert_type == 4:
self.type = CertType.SIGNING
elif cert_type == 5:
self.type = CertType.LINK_CERT
elif cert_type == 6:
self.type = CertType.AUTH
elif cert_type == 7:
raise ValueError('Ed25519 certificate cannot have a type of 7. This is reserved for RSA identity cross-certification.')
else:
raise ValueError("BUG: Ed25519 certificate type is decoded from one byte. It shouldn't be possible to have a value of %i." % cert_type)
# expiration time is in hours since epoch
try:
self.expiration = datetime.datetime.utcfromtimestamp(stem.util.str_tools._to_int(decoded[2:6]) * 3600)
except ValueError as exc:
raise ValueError('Invalid expiration timestamp (%s): %s' % (exc, stem.util.str_tools._to_int(decoded[2:6]) * 3600))
self.key_type = stem.util.str_tools._to_int(decoded[6:7])
self.key = decoded[7:39]
self.signature = decoded[-ED25519_SIGNATURE_LENGTH:]
self.extensions = []
extension_count = stem.util.str_tools._to_int(decoded[39:40])
remaining_data = decoded[40:-ED25519_SIGNATURE_LENGTH]
for i in range(extension_count):
if len(remaining_data) < 4:
raise ValueError('Ed25519 extension is missing header field data')
extension_length = stem.util.str_tools._to_int(remaining_data[:2])
extension_type = stem.util.str_tools._to_int(remaining_data[2:3])
extension_flags = stem.util.str_tools._to_int(remaining_data[3:4])
extension_data = remaining_data[4:4 + extension_length]
if extension_length != len(extension_data):
raise ValueError("Ed25519 extension is truncated. It should have %i bytes of data but there's only %i." % (extension_length, len(extension_data)))
flags, remaining_flags = [], extension_flags
if remaining_flags % 2 == 1:
flags.append(ExtensionFlag.AFFECTS_VALIDATION)
remaining_flags -= 1
if remaining_flags:
flags.append(ExtensionFlag.UNKNOWN)
if extension_type == ExtensionType.HAS_SIGNING_KEY and len(extension_data) != 32:
raise ValueError('Ed25519 HAS_SIGNING_KEY extension must be 32 bytes, but was %i.' % len(extension_data))
self.extensions.append(Ed25519Extension(extension_type, flags, extension_flags, extension_data))
remaining_data = remaining_data[4 + extension_length:]
if remaining_data:
raise ValueError('Ed25519 certificate had %i bytes of unused extension data' % len(remaining_data))
def is_expired(self):
"""
Checks if this certificate is presently expired or not.
:returns: **True** if the certificate has expired, **False** otherwise
"""
return datetime.datetime.now() > self.expiration
def validate(self, server_descriptor):
"""
Validates our signing key and that the given descriptor content matches its
Ed25519 signature.
:param stem.descriptor.server_descriptor.Ed25519 server_descriptor: relay
server descriptor to validate
:raises:
* **ValueError** if signing key or descriptor are invalid
* **ImportError** if pynacl module is unavailable
"""
if not stem.prereq._is_pynacl_available():
raise ImportError('Certificate validation requires the pynacl module')
import nacl.signing
import nacl.encoding
from nacl.exceptions import BadSignatureError
descriptor_content = server_descriptor.get_bytes()
signing_key = None
if server_descriptor.ed25519_master_key:
signing_key = nacl.signing.VerifyKey(stem.util.str_tools._to_bytes(server_descriptor.ed25519_master_key) + b'=', encoder = nacl.encoding.Base64Encoder)
else:
for extension in self.extensions:
if extension.type == ExtensionType.HAS_SIGNING_KEY:
signing_key = nacl.signing.VerifyKey(extension.data)
break
if not signing_key:
raise ValueError('Server descriptor missing an ed25519 signing key')
try:
signing_key.verify(base64.b64decode(stem.util.str_tools._to_bytes(self.encoded))[:-ED25519_SIGNATURE_LENGTH], self.signature)
except BadSignatureError as exc:
raise ValueError('Ed25519KeyCertificate signing key is invalid (%s)' % exc)
# ed25519 signature validates descriptor content up until the signature itself
if b'router-sig-ed25519 ' not in descriptor_content:
raise ValueError("Descriptor doesn't have a router-sig-ed25519 entry.")
signed_content = descriptor_content[:descriptor_content.index(b'router-sig-ed25519 ') + 19]
descriptor_sha256_digest = hashlib.sha256(ED25519_ROUTER_SIGNATURE_PREFIX + signed_content).digest()
missing_padding = len(server_descriptor.ed25519_signature) % 4
signature_bytes = base64.b64decode(stem.util.str_tools._to_bytes(server_descriptor.ed25519_signature) + b'=' * missing_padding)
try:
verify_key = nacl.signing.VerifyKey(self.key)
verify_key.verify(descriptor_sha256_digest, signature_bytes)
except BadSignatureError as exc:
raise ValueError('Descriptor Ed25519 certificate signature invalid (%s)' % exc)

View file

@ -0,0 +1,115 @@
# Copyright 2012-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Toolkit for exporting descriptors to other formats.
**Module Overview:**
::
export_csv - Exports descriptors to a CSV
export_csv_file - Writes exported CSV output to a file
.. deprecated:: 1.7.0
This module will likely be removed in Stem 2.0 due to lack of usage. If you
use this modle please `let me know <https://www.atagar.com/contact/>`_.
"""
import csv
try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
import stem.descriptor
import stem.prereq
class _ExportDialect(csv.excel):
lineterminator = '\n'
def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
"""
Provides a newline separated CSV for one or more descriptors. If simply
provided with descriptors then the CSV contains all of its attributes,
labeled with a header row. Either 'included_fields' or 'excluded_fields' can
be used for more granular control over its attributes and the order.
:param Descriptor,list descriptors: either a
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
:param list included_fields: attributes to include in the csv
:param list excluded_fields: attributes to exclude from the csv
:param bool header: if **True** then the first line will be a comma separated
list of the attribute names (**only supported in python 2.7 and higher**)
:returns: **str** of the CSV for the descriptors, one per line
:raises: **ValueError** if descriptors contain more than one descriptor type
"""
output_buffer = StringIO()
export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
return output_buffer.getvalue()
def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
"""
Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
written directly to a file.
:param file output_file: file to be written to
:param Descriptor,list descriptors: either a
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
:param list included_fields: attributes to include in the csv
:param list excluded_fields: attributes to exclude from the csv
:param bool header: if **True** then the first line will be a comma separated
list of the attribute names (**only supported in python 2.7 and higher**)
:returns: **str** of the CSV for the descriptors, one per line
:raises: **ValueError** if descriptors contain more than one descriptor type
"""
if isinstance(descriptors, stem.descriptor.Descriptor):
descriptors = (descriptors,)
if not descriptors:
return
descriptor_type = type(descriptors[0])
descriptor_type_label = descriptor_type.__name__
included_fields = list(included_fields)
# If the user didn't specify the fields to include then export everything,
# ordered alphabetically. If they did specify fields then make sure that
# they exist.
desc_attr = sorted(vars(descriptors[0]).keys())
if included_fields:
for field in included_fields:
if field not in desc_attr:
raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ', '.join(desc_attr)))
else:
included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
for field in excluded_fields:
try:
included_fields.remove(field)
except ValueError:
pass
writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
if header and not stem.prereq._is_python_26():
writer.writeheader()
for desc in descriptors:
if not isinstance(desc, stem.descriptor.Descriptor):
raise ValueError('Unable to export a descriptor CSV since %s is not a descriptor.' % type(desc).__name__)
elif descriptor_type != type(desc):
raise ValueError('To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s.' % (descriptor_type_label, type(desc)))
writer.writerow(vars(desc))

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,438 @@
# Copyright 2015-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Parsing for Tor hidden service descriptors as described in Tor's `rend-spec
<https://gitweb.torproject.org/torspec.git/tree/rend-spec.txt>`_.
Unlike other descriptor types these describe a hidden service rather than a
relay. They're created by the service, and can only be fetched via relays with
the HSDir flag.
These are only available through the Controller's
:func:`~stem.control.get_hidden_service_descriptor` method.
**Module Overview:**
::
HiddenServiceDescriptor - Tor hidden service descriptor.
.. versionadded:: 1.4.0
"""
import base64
import binascii
import collections
import hashlib
import io
import stem.prereq
import stem.util.connection
import stem.util.str_tools
from stem.descriptor import (
PGP_BLOCK_END,
Descriptor,
_descriptor_content,
_descriptor_components,
_read_until_keywords,
_bytes_for_block,
_value,
_parse_simple_line,
_parse_timestamp_line,
_parse_key_block,
_random_date,
_random_crypto_blob,
)
if stem.prereq._is_lru_cache_available():
from functools import lru_cache
else:
from stem.util.lru_cache import lru_cache
REQUIRED_FIELDS = (
'rendezvous-service-descriptor',
'version',
'permanent-key',
'secret-id-part',
'publication-time',
'protocol-versions',
'signature',
)
INTRODUCTION_POINTS_ATTR = {
'identifier': None,
'address': None,
'port': None,
'onion_key': None,
'service_key': None,
'intro_authentication': [],
}
# introduction-point fields that can only appear once
SINGLE_INTRODUCTION_POINT_FIELDS = [
'introduction-point',
'ip-address',
'onion-port',
'onion-key',
'service-key',
]
BASIC_AUTH = 1
STEALTH_AUTH = 2
class IntroductionPoints(collections.namedtuple('IntroductionPoints', INTRODUCTION_POINTS_ATTR.keys())):
"""
:var str identifier: hash of this introduction point's identity key
:var str address: address of this introduction point
:var int port: port where this introduction point is listening
:var str onion_key: public key for communicating with this introduction point
:var str service_key: public key for communicating with this hidden service
:var list intro_authentication: tuples of the form (auth_type, auth_data) for
establishing a connection
"""
class DecryptionFailure(Exception):
"""
Failure to decrypt the hidden service descriptor's introduction-points.
"""
def _parse_file(descriptor_file, validate = False, **kwargs):
"""
Iterates over the hidden service descriptors in a file.
:param file descriptor_file: file with descriptor content
:param bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
:param dict kwargs: additional arguments for the descriptor constructor
:returns: iterator for :class:`~stem.descriptor.hidden_service_descriptor.HiddenServiceDescriptor`
instances in the file
:raises:
* **ValueError** if the contents is malformed and validate is **True**
* **IOError** if the file can't be read
"""
while True:
descriptor_content = _read_until_keywords('signature', descriptor_file)
# we've reached the 'signature', now include the pgp style block
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
if descriptor_content:
if descriptor_content[0].startswith(b'@type'):
descriptor_content = descriptor_content[1:]
yield HiddenServiceDescriptor(bytes.join(b'', descriptor_content), validate, **kwargs)
else:
break # done parsing file
def _parse_version_line(descriptor, entries):
value = _value('version', entries)
if value.isdigit():
descriptor.version = int(value)
else:
raise ValueError('version line must have a positive integer value: %s' % value)
def _parse_protocol_versions_line(descriptor, entries):
value = _value('protocol-versions', entries)
try:
versions = [int(entry) for entry in value.split(',')]
except ValueError:
raise ValueError('protocol-versions line has non-numeric versoins: protocol-versions %s' % value)
for v in versions:
if v <= 0:
raise ValueError('protocol-versions must be positive integers: %s' % value)
descriptor.protocol_versions = versions
def _parse_introduction_points_line(descriptor, entries):
_, block_type, block_contents = entries['introduction-points'][0]
if not block_contents or block_type != 'MESSAGE':
raise ValueError("'introduction-points' should be followed by a MESSAGE block, but was a %s" % block_type)
descriptor.introduction_points_encoded = block_contents
descriptor.introduction_points_auth = [] # field was never implemented in tor (#15190)
try:
descriptor.introduction_points_content = _bytes_for_block(block_contents)
except TypeError:
raise ValueError("'introduction-points' isn't base64 encoded content:\n%s" % block_contents)
_parse_rendezvous_service_descriptor_line = _parse_simple_line('rendezvous-service-descriptor', 'descriptor_id')
_parse_permanent_key_line = _parse_key_block('permanent-key', 'permanent_key', 'RSA PUBLIC KEY')
_parse_secret_id_part_line = _parse_simple_line('secret-id-part', 'secret_id_part')
_parse_publication_time_line = _parse_timestamp_line('publication-time', 'published')
_parse_signature_line = _parse_key_block('signature', 'signature', 'SIGNATURE')
class HiddenServiceDescriptor(Descriptor):
"""
Hidden service descriptor.
:var str descriptor_id: **\*** identifier for this descriptor, this is a base32 hash of several fields
:var int version: **\*** hidden service descriptor version
:var str permanent_key: **\*** long term key of the hidden service
:var str secret_id_part: **\*** hash of the time period, cookie, and replica
values so our descriptor_id can be validated
:var datetime published: **\*** time in UTC when this descriptor was made
:var list protocol_versions: **\*** list of **int** versions that are supported when establishing a connection
:var str introduction_points_encoded: raw introduction points blob
:var list introduction_points_auth: **\*** tuples of the form
(auth_method, auth_data) for our introduction_points_content
(**deprecated**, always **[]**)
:var bytes introduction_points_content: decoded introduction-points content
without authentication data, if using cookie authentication this is
encrypted
:var str signature: signature of the descriptor content
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as **None** if undefined
.. versionchanged:: 1.6.0
Moved from the deprecated `pycrypto
<https://www.dlitz.net/software/pycrypto/>`_ module to `cryptography
<https://pypi.python.org/pypi/cryptography>`_ for validating signatures.
.. versionchanged:: 1.6.0
Added the **skip_crypto_validation** constructor argument.
"""
ATTRIBUTES = {
'descriptor_id': (None, _parse_rendezvous_service_descriptor_line),
'version': (None, _parse_version_line),
'permanent_key': (None, _parse_permanent_key_line),
'secret_id_part': (None, _parse_secret_id_part_line),
'published': (None, _parse_publication_time_line),
'protocol_versions': ([], _parse_protocol_versions_line),
'introduction_points_encoded': (None, _parse_introduction_points_line),
'introduction_points_auth': ([], _parse_introduction_points_line),
'introduction_points_content': (None, _parse_introduction_points_line),
'signature': (None, _parse_signature_line),
}
PARSER_FOR_LINE = {
'rendezvous-service-descriptor': _parse_rendezvous_service_descriptor_line,
'version': _parse_version_line,
'permanent-key': _parse_permanent_key_line,
'secret-id-part': _parse_secret_id_part_line,
'publication-time': _parse_publication_time_line,
'protocol-versions': _parse_protocol_versions_line,
'introduction-points': _parse_introduction_points_line,
'signature': _parse_signature_line,
}
@classmethod
def content(cls, attr = None, exclude = (), sign = False):
if sign:
raise NotImplementedError('Signing of %s not implemented' % cls.__name__)
return _descriptor_content(attr, exclude, (
('rendezvous-service-descriptor', 'y3olqqblqw2gbh6phimfuiroechjjafa'),
('version', '2'),
('permanent-key', _random_crypto_blob('RSA PUBLIC KEY')),
('secret-id-part', 'e24kgecavwsznj7gpbktqsiwgvngsf4e'),
('publication-time', _random_date()),
('protocol-versions', '2,3'),
('introduction-points', '\n-----BEGIN MESSAGE-----\n-----END MESSAGE-----'),
), (
('signature', _random_crypto_blob('SIGNATURE')),
))
@classmethod
def create(cls, attr = None, exclude = (), validate = True, sign = False):
return cls(cls.content(attr, exclude, sign), validate = validate, skip_crypto_validation = not sign)
def __init__(self, raw_contents, validate = False, skip_crypto_validation = False):
super(HiddenServiceDescriptor, self).__init__(raw_contents, lazy_load = not validate)
entries = _descriptor_components(raw_contents, validate, non_ascii_fields = ('introduction-points'))
if validate:
for keyword in REQUIRED_FIELDS:
if keyword not in entries:
raise ValueError("Hidden service descriptor must have a '%s' entry" % keyword)
elif keyword in entries and len(entries[keyword]) > 1:
raise ValueError("The '%s' entry can only appear once in a hidden service descriptor" % keyword)
if 'rendezvous-service-descriptor' != list(entries.keys())[0]:
raise ValueError("Hidden service descriptor must start with a 'rendezvous-service-descriptor' entry")
elif 'signature' != list(entries.keys())[-1]:
raise ValueError("Hidden service descriptor must end with a 'signature' entry")
self._parse(entries, validate)
if not skip_crypto_validation and stem.prereq.is_crypto_available():
signed_digest = self._digest_for_signature(self.permanent_key, self.signature)
content_digest = self._digest_for_content(b'rendezvous-service-descriptor ', b'\nsignature\n')
if signed_digest != content_digest:
raise ValueError('Decrypted digest does not match local digest (calculated: %s, local: %s)' % (signed_digest, content_digest))
else:
self._entries = entries
@lru_cache()
def introduction_points(self, authentication_cookie = None):
"""
Provided this service's introduction points.
:returns: **list** of :class:`~stem.descriptor.hidden_service_descriptor.IntroductionPoints`
:raises:
* **ValueError** if the our introduction-points is malformed
* **DecryptionFailure** if unable to decrypt this field
"""
content = self.introduction_points_content
if not content:
return []
elif authentication_cookie:
if not stem.prereq.is_crypto_available():
raise DecryptionFailure('Decrypting introduction-points requires the cryptography module')
try:
missing_padding = len(authentication_cookie) % 4
authentication_cookie = base64.b64decode(stem.util.str_tools._to_bytes(authentication_cookie) + b'=' * missing_padding)
except TypeError as exc:
raise DecryptionFailure('authentication_cookie must be a base64 encoded string (%s)' % exc)
authentication_type = int(binascii.hexlify(content[0:1]), 16)
if authentication_type == BASIC_AUTH:
content = HiddenServiceDescriptor._decrypt_basic_auth(content, authentication_cookie)
elif authentication_type == STEALTH_AUTH:
content = HiddenServiceDescriptor._decrypt_stealth_auth(content, authentication_cookie)
else:
raise DecryptionFailure("Unrecognized authentication type '%s', currently we only support basic auth (%s) and stealth auth (%s)" % (authentication_type, BASIC_AUTH, STEALTH_AUTH))
if not content.startswith(b'introduction-point '):
raise DecryptionFailure('Unable to decrypt the introduction-points, maybe this is the wrong key?')
elif not content.startswith(b'introduction-point '):
raise DecryptionFailure('introduction-points content is encrypted, you need to provide its authentication_cookie')
return HiddenServiceDescriptor._parse_introduction_points(content)
@staticmethod
def _decrypt_basic_auth(content, authentication_cookie):
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
try:
client_blocks = int(binascii.hexlify(content[1:2]), 16)
except ValueError:
raise DecryptionFailure("When using basic auth the content should start with a number of blocks but wasn't a hex digit: %s" % binascii.hexlify(content[1:2]))
# parse the client id and encrypted session keys
client_entries_length = client_blocks * 16 * 20
client_entries = content[2:2 + client_entries_length]
client_keys = [(client_entries[i:i + 4], client_entries[i + 4:i + 20]) for i in range(0, client_entries_length, 4 + 16)]
iv = content[2 + client_entries_length:2 + client_entries_length + 16]
encrypted = content[2 + client_entries_length + 16:]
client_id = hashlib.sha1(authentication_cookie + iv).digest()[:4]
for entry_id, encrypted_session_key in client_keys:
if entry_id != client_id:
continue # not the session key for this client
# try decrypting the session key
cipher = Cipher(algorithms.AES(authentication_cookie), modes.CTR(b'\x00' * len(iv)), default_backend())
decryptor = cipher.decryptor()
session_key = decryptor.update(encrypted_session_key) + decryptor.finalize()
# attempt to decrypt the intro points with the session key
cipher = Cipher(algorithms.AES(session_key), modes.CTR(iv), default_backend())
decryptor = cipher.decryptor()
decrypted = decryptor.update(encrypted) + decryptor.finalize()
# check if the decryption looks correct
if decrypted.startswith(b'introduction-point '):
return decrypted
return content # nope, unable to decrypt the content
@staticmethod
def _decrypt_stealth_auth(content, authentication_cookie):
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
# byte 1 = authentication type, 2-17 = input vector, 18 on = encrypted content
iv, encrypted = content[1:17], content[17:]
cipher = Cipher(algorithms.AES(authentication_cookie), modes.CTR(iv), default_backend())
decryptor = cipher.decryptor()
return decryptor.update(encrypted) + decryptor.finalize()
@staticmethod
def _parse_introduction_points(content):
"""
Provides the parsed list of IntroductionPoints for the unencrypted content.
"""
introduction_points = []
content_io = io.BytesIO(content)
while True:
content = b''.join(_read_until_keywords('introduction-point', content_io, ignore_first = True))
if not content:
break # reached the end
attr = dict(INTRODUCTION_POINTS_ATTR)
entries = _descriptor_components(content, False)
for keyword, values in list(entries.items()):
value, block_type, block_contents = values[0]
if keyword in SINGLE_INTRODUCTION_POINT_FIELDS and len(values) > 1:
raise ValueError("'%s' can only appear once in an introduction-point block, but appeared %i times" % (keyword, len(values)))
if keyword == 'introduction-point':
attr['identifier'] = value
elif keyword == 'ip-address':
if not stem.util.connection.is_valid_ipv4_address(value):
raise ValueError("'%s' is an invalid IPv4 address" % value)
attr['address'] = value
elif keyword == 'onion-port':
if not stem.util.connection.is_valid_port(value):
raise ValueError("'%s' is an invalid port" % value)
attr['port'] = int(value)
elif keyword == 'onion-key':
attr['onion_key'] = block_contents
elif keyword == 'service-key':
attr['service_key'] = block_contents
elif keyword == 'intro-authentication':
auth_entries = []
for auth_value, _, _ in values:
if ' ' not in auth_value:
raise ValueError("We expected 'intro-authentication [auth_type] [auth_data]', but had '%s'" % auth_value)
auth_type, auth_data = auth_value.split(' ')[:2]
auth_entries.append((auth_type, auth_data))
introduction_points.append(IntroductionPoints(**attr))
return introduction_points

View file

@ -0,0 +1,361 @@
# Copyright 2013-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Parsing for Tor microdescriptors, which contain a distilled version of a
relay's server descriptor. As of Tor version 0.2.3.3-alpha Tor no longer
downloads server descriptors by default, opting for microdescriptors instead.
Unlike most descriptor documents these aren't available on the metrics site
(since they don't contain any information that the server descriptors don't).
The limited information in microdescriptors make them rather clunky to use
compared with server descriptors. For instance microdescriptors lack the
relay's fingerprint, making it difficut to use them to look up the relay's
other descriptors.
To do so you need to match the microdescriptor's digest against its
corresponding router status entry. For added fun as of this writing the
controller doesn't even surface those router status entries
(:trac:`7953`).
For instance, here's an example that prints the nickname and fingerprints of
the exit relays.
::
import os
from stem.control import Controller
from stem.descriptor import parse_file
with Controller.from_port(port = 9051) as controller:
controller.authenticate()
exit_digests = set()
data_dir = controller.get_conf('DataDirectory')
for desc in controller.get_microdescriptors():
if desc.exit_policy.is_exiting_allowed():
exit_digests.add(desc.digest)
print 'Exit Relays:'
for desc in parse_file(os.path.join(data_dir, 'cached-microdesc-consensus')):
if desc.digest in exit_digests:
print ' %s (%s)' % (desc.nickname, desc.fingerprint)
Doing the same is trivial with server descriptors...
::
from stem.descriptor import parse_file
print 'Exit Relays:'
for desc in parse_file('/home/atagar/.tor/cached-descriptors'):
if desc.exit_policy.is_exiting_allowed():
print ' %s (%s)' % (desc.nickname, desc.fingerprint)
**Module Overview:**
::
Microdescriptor - Tor microdescriptor.
"""
import hashlib
import stem.exit_policy
import stem.prereq
from stem.descriptor import (
Descriptor,
_descriptor_content,
_descriptor_components,
_read_until_keywords,
_values,
_parse_simple_line,
_parse_protocol_line,
_parse_key_block,
_random_crypto_blob,
)
from stem.descriptor.router_status_entry import (
_parse_a_line,
_parse_p_line,
)
if stem.prereq._is_lru_cache_available():
from functools import lru_cache
else:
from stem.util.lru_cache import lru_cache
REQUIRED_FIELDS = (
'onion-key',
)
SINGLE_FIELDS = (
'onion-key',
'ntor-onion-key',
'family',
'p',
'p6',
'pr',
)
def _parse_file(descriptor_file, validate = False, **kwargs):
"""
Iterates over the microdescriptors in a file.
:param file descriptor_file: file with descriptor content
:param bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
:param dict kwargs: additional arguments for the descriptor constructor
:returns: iterator for Microdescriptor instances in the file
:raises:
* **ValueError** if the contents is malformed and validate is True
* **IOError** if the file can't be read
"""
while True:
annotations = _read_until_keywords('onion-key', descriptor_file)
# read until we reach an annotation or onion-key line
descriptor_lines = []
# read the onion-key line, done if we're at the end of the document
onion_key_line = descriptor_file.readline()
if onion_key_line:
descriptor_lines.append(onion_key_line)
else:
break
while True:
last_position = descriptor_file.tell()
line = descriptor_file.readline()
if not line:
break # EOF
elif line.startswith(b'@') or line.startswith(b'onion-key'):
descriptor_file.seek(last_position)
break
else:
descriptor_lines.append(line)
if descriptor_lines:
if descriptor_lines[0].startswith(b'@type'):
descriptor_lines = descriptor_lines[1:]
# strip newlines from annotations
annotations = list(map(bytes.strip, annotations))
descriptor_text = bytes.join(b'', descriptor_lines)
yield Microdescriptor(descriptor_text, validate, annotations, **kwargs)
else:
break # done parsing descriptors
def _parse_id_line(descriptor, entries):
identities = {}
for entry in _values('id', entries):
entry_comp = entry.split()
if len(entry_comp) >= 2:
key_type, key_value = entry_comp[0], entry_comp[1]
if key_type in identities:
raise ValueError("There can only be one 'id' line per a key type, but '%s' appeared multiple times" % key_type)
descriptor.identifier_type = key_type
descriptor.identifier = key_value
identities[key_type] = key_value
else:
raise ValueError("'id' lines should contain both the key type and digest: id %s" % entry)
descriptor.identifiers = identities
def _parse_digest(descriptor, entries):
setattr(descriptor, 'digest', hashlib.sha256(descriptor.get_bytes()).hexdigest().upper())
_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY')
_parse_ntor_onion_key_line = _parse_simple_line('ntor-onion-key', 'ntor_onion_key')
_parse_family_line = _parse_simple_line('family', 'family', func = lambda v: v.split(' '))
_parse_p6_line = _parse_simple_line('p6', 'exit_policy_v6', func = lambda v: stem.exit_policy.MicroExitPolicy(v))
_parse_pr_line = _parse_protocol_line('pr', 'protocols')
class Microdescriptor(Descriptor):
"""
Microdescriptor (`descriptor specification
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_)
:var str digest: **\*** hex digest for this microdescriptor, this can be used
to match against the corresponding digest attribute of a
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3`
:var str onion_key: **\*** key used to encrypt EXTEND cells
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
:var list or_addresses: **\*** alternative for our address/or_port attributes, each
entry is a tuple of the form (address (**str**), port (**int**), is_ipv6
(**bool**))
:var list family: **\*** nicknames or fingerprints of declared family
:var stem.exit_policy.MicroExitPolicy exit_policy: **\*** relay's exit policy
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
:var hash identifiers: mapping of key types (like rsa1024 or ed25519) to
their base64 encoded identity, this is only used for collision prevention
(:trac:`11743`)
:var dict protocols: mapping of protocols to their supported versions
:var str identifier: base64 encoded identity digest (**deprecated**, use
identifiers instead)
:var str identifier_type: identity digest key type (**deprecated**, use
identifiers instead)
**\*** attribute is required when we're parsed with validation
.. versionchanged:: 1.1.0
Added the identifier and identifier_type attributes.
.. versionchanged:: 1.5.0
Added the identifiers attribute, and deprecated identifier and
identifier_type since the field can now appear multiple times.
.. versionchanged:: 1.6.0
Added the protocols attribute.
"""
ATTRIBUTES = {
'onion_key': (None, _parse_onion_key_line),
'ntor_onion_key': (None, _parse_ntor_onion_key_line),
'or_addresses': ([], _parse_a_line),
'family': ([], _parse_family_line),
'exit_policy': (stem.exit_policy.MicroExitPolicy('reject 1-65535'), _parse_p_line),
'exit_policy_v6': (None, _parse_p6_line),
'identifier_type': (None, _parse_id_line), # deprecated in favor of identifiers
'identifier': (None, _parse_id_line), # deprecated in favor of identifiers
'identifiers': ({}, _parse_id_line),
'protocols': ({}, _parse_pr_line),
'digest': (None, _parse_digest),
}
PARSER_FOR_LINE = {
'onion-key': _parse_onion_key_line,
'ntor-onion-key': _parse_ntor_onion_key_line,
'a': _parse_a_line,
'family': _parse_family_line,
'p': _parse_p_line,
'p6': _parse_p6_line,
'pr': _parse_pr_line,
'id': _parse_id_line,
}
@classmethod
def content(cls, attr = None, exclude = (), sign = False):
if sign:
raise NotImplementedError('Signing of %s not implemented' % cls.__name__)
return _descriptor_content(attr, exclude, (
('onion-key', _random_crypto_blob('RSA PUBLIC KEY')),
))
def __init__(self, raw_contents, validate = False, annotations = None):
super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate)
self._annotation_lines = annotations if annotations else []
entries = _descriptor_components(raw_contents, validate)
if validate:
self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
self._parse(entries, validate)
self._check_constraints(entries)
else:
self._entries = entries
@lru_cache()
def get_annotations(self):
"""
Provides content that appeared prior to the descriptor. If this comes from
the cached-microdescs then this commonly contains content like...
::
@last-listed 2013-02-24 00:18:30
:returns: **dict** with the key/value pairs in our annotations
"""
annotation_dict = {}
for line in self._annotation_lines:
if b' ' in line:
key, value = line.split(b' ', 1)
annotation_dict[key] = value
else:
annotation_dict[line] = None
return annotation_dict
def get_annotation_lines(self):
"""
Provides the lines of content that appeared prior to the descriptor. This
is the same as the
:func:`~stem.descriptor.microdescriptor.Microdescriptor.get_annotations`
results, but with the unparsed lines and ordering retained.
:returns: **list** with the lines of annotation that came before this descriptor
"""
return self._annotation_lines
def _check_constraints(self, entries):
"""
Does a basic check that the entries conform to this descriptor type's
constraints.
:param dict entries: keyword => (value, pgp key) entries
:raises: **ValueError** if an issue arises in validation
"""
for keyword in REQUIRED_FIELDS:
if keyword not in entries:
raise ValueError("Microdescriptor must have a '%s' entry" % keyword)
for keyword in SINGLE_FIELDS:
if keyword in entries and len(entries[keyword]) > 1:
raise ValueError("The '%s' entry can only appear once in a microdescriptor" % keyword)
if 'onion-key' != list(entries.keys())[0]:
raise ValueError("Microdescriptor must start with a 'onion-key' entry")
def _name(self, is_plural = False):
return 'microdescriptors' if is_plural else 'microdescriptor'
def _compare(self, other, method):
if not isinstance(other, Microdescriptor):
return False
return method(str(self).strip(), str(other).strip())
def __hash__(self):
return hash(str(self).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __ne__(self, other):
return not self == other
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,572 @@
# Copyright 2012-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Utilities for reading descriptors from local directories and archives. This is
mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
class, which is an iterator for the descriptor data in a series of
destinations. For example...
::
my_descriptors = [
'/tmp/server-descriptors-2012-03.tar.bz2',
'/tmp/archived_descriptors/',
]
# prints the contents of all the descriptor files
with DescriptorReader(my_descriptors) as reader:
for descriptor in reader:
print descriptor
This ignores files that cannot be processed due to read errors or unparsable
content. To be notified of skipped files you can register a listener with
:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
modified timestamps for descriptor files that it has read so it can skip
unchanged files if run again. This listing of processed files can also be
persisted and applied to other
:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
following prints descriptors as they're changed over the course of a minute,
and picks up where it left off if run again...
::
reader = DescriptorReader(['/tmp/descriptor_data'])
try:
processed_files = load_processed_files('/tmp/used_descriptors')
reader.set_processed_files(processed_files)
except: pass # could not load, maybe this is the first run
start_time = time.time()
while (time.time() - start_time) < 60:
# prints any descriptors that have changed since last checked
with reader:
for descriptor in reader:
print descriptor
time.sleep(1)
save_processed_files('/tmp/used_descriptors', reader.get_processed_files())
**Module Overview:**
::
load_processed_files - Loads a listing of processed files
save_processed_files - Saves a listing of processed files
DescriptorReader - Iterator for descriptor data on the local file system
|- get_processed_files - provides the listing of files that we've processed
|- set_processed_files - sets our tracking of the files we have processed
|- register_read_listener - adds a listener for when files are read
|- register_skip_listener - adds a listener that's notified of skipped files
|- start - begins reading descriptor data
|- stop - stops reading descriptor data
|- __enter__ / __exit__ - manages the descriptor reader thread in the context
+- __iter__ - iterates over descriptor data in unread files
FileSkipped - Base exception for a file that was skipped
|- AlreadyRead - We've already read a file with this last modified timestamp
|- ParsingFailure - Contents can't be parsed as descriptor data
|- UnrecognizedType - File extension indicates non-descriptor data
+- ReadFailed - Wraps an error that was raised while reading the file
+- FileMissing - File does not exist
"""
import mimetypes
import os
import tarfile
import threading
try:
import queue
except ImportError:
import Queue as queue
import stem.descriptor
import stem.prereq
import stem.util
import stem.util.str_tools
import stem.util.system
# flag to indicate when the reader thread is out of descriptor files to read
FINISHED = 'DONE'
class FileSkipped(Exception):
"Base error when we can't provide descriptor data from a file."
class AlreadyRead(FileSkipped):
"""
Already read a file with this 'last modified' timestamp or later.
:param int last_modified: unix timestamp for when the file was last modified
:param int last_modified_when_read: unix timestamp for the modification time
when we last read this file
"""
def __init__(self, last_modified, last_modified_when_read):
super(AlreadyRead, self).__init__('File has already been read since it was last modified. modification time: %s, last read: %s' % (last_modified, last_modified_when_read))
self.last_modified = last_modified
self.last_modified_when_read = last_modified_when_read
class ParsingFailure(FileSkipped):
"""
File contents could not be parsed as descriptor data.
:param ValueError exception: issue that arose when parsing
"""
def __init__(self, parsing_exception):
super(ParsingFailure, self).__init__(parsing_exception)
self.exception = parsing_exception
class UnrecognizedType(FileSkipped):
"""
File doesn't contain descriptor data. This could either be due to its file
type or because it doesn't conform to a recognizable descriptor type.
:param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
"""
def __init__(self, mime_type):
super(UnrecognizedType, self).__init__('Unrecognized mime type: %s (%s)' % mime_type)
self.mime_type = mime_type
class ReadFailed(FileSkipped):
"""
An IOError occurred while trying to read the file.
:param IOError exception: issue that arose when reading the file, **None** if
this arose due to the file not being present
"""
def __init__(self, read_exception):
super(ReadFailed, self).__init__(read_exception)
self.exception = read_exception
class FileMissing(ReadFailed):
'File does not exist.'
def __init__(self):
super(FileMissing, self).__init__('File does not exist')
def load_processed_files(path):
"""
Loads a dictionary of 'path => last modified timestamp' mappings, as
persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
file.
:param str path: location to load the processed files dictionary from
:returns: **dict** of 'path (**str**) => last modified unix timestamp
(**int**)' mappings
:raises:
* **IOError** if unable to read the file
* **TypeError** if unable to parse the file's contents
"""
processed_files = {}
with open(path, 'rb') as input_file:
for line in input_file.readlines():
line = stem.util.str_tools._to_unicode(line.strip())
if not line:
continue # skip blank lines
if ' ' not in line:
raise TypeError('Malformed line: %s' % line)
path, timestamp = line.rsplit(' ', 1)
if not os.path.isabs(path):
raise TypeError("'%s' is not an absolute path" % path)
elif not timestamp.isdigit():
raise TypeError("'%s' is not an integer timestamp" % timestamp)
processed_files[path] = int(timestamp)
return processed_files
def save_processed_files(path, processed_files):
"""
Persists a dictionary of 'path => last modified timestamp' mappings (as
provided by the DescriptorReader's
:func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
so that they can be loaded later and applied to another
:class:`~stem.descriptor.reader.DescriptorReader`.
:param str path: location to save the processed files dictionary to
:param dict processed_files: 'path => last modified' mappings
:raises:
* **IOError** if unable to write to the file
* **TypeError** if processed_files is of the wrong type
"""
# makes the parent directory if it doesn't already exist
try:
path_dir = os.path.dirname(path)
if not os.path.exists(path_dir):
os.makedirs(path_dir)
except OSError as exc:
raise IOError(exc)
with open(path, 'w') as output_file:
for path, timestamp in list(processed_files.items()):
if not os.path.isabs(path):
raise TypeError('Only absolute paths are acceptable: %s' % path)
output_file.write('%s %i\n' % (path, timestamp))
class DescriptorReader(object):
"""
Iterator for the descriptor data on the local file system. This can process
text files, tarball archives (gzip or bzip2), or recurse directories.
By default this limits the number of descriptors that we'll read ahead before
waiting for our caller to fetch some of them. This is included to avoid
unbounded memory usage.
Our persistence_path argument is a convenient method to persist the listing
of files we have processed between runs, however it doesn't allow for error
handling. If you want that then use the
:func:`~stem.descriptor.reader.load_processed_files` and
:func:`~stem.descriptor.reader.save_processed_files` functions instead.
:param str,list target: path or list of paths for files or directories to be read from
:param bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
:param bool follow_links: determines if we'll follow symlinks when traversing
directories (requires python 2.6)
:param int buffer_size: descriptors we'll buffer before waiting for some to
be read, this is unbounded if zero
:param str persistence_path: if set we will load and save processed file
listings from this path, errors are ignored
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:param dict kwargs: additional arguments for the descriptor constructor
"""
def __init__(self, target, validate = False, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
self._targets = [target] if stem.util._is_str(target) else target
# expand any relative paths we got
self._targets = list(map(os.path.abspath, self._targets))
self._validate = validate
self._follow_links = follow_links
self._persistence_path = persistence_path
self._document_handler = document_handler
self._kwargs = kwargs
self._read_listeners = []
self._skip_listeners = []
self._processed_files = {}
self._reader_thread = None
self._reader_thread_lock = threading.RLock()
self._iter_lock = threading.RLock()
self._iter_notice = threading.Event()
self._is_stopped = threading.Event()
self._is_stopped.set()
# Descriptors that we have read but not yet provided to the caller. A
# FINISHED entry is used by the reading thread to indicate the end.
self._unreturned_descriptors = queue.Queue(buffer_size)
if self._persistence_path:
try:
processed_files = load_processed_files(self._persistence_path)
self.set_processed_files(processed_files)
except:
pass
def get_processed_files(self):
"""
For each file that we have read descriptor data from this provides a
mapping of the form...
::
absolute path (str) => last modified unix timestamp (int)
This includes entries set through the
:func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
method. Each run resets this to only the files that were present during
that run.
:returns: **dict** with the absolute paths and unix timestamp for the last
modified times of the files we have processed
"""
# make sure that we only provide back absolute paths
return dict((os.path.abspath(k), v) for (k, v) in list(self._processed_files.items()))
def set_processed_files(self, processed_files):
"""
Sets the listing of the files we have processed. Most often this is used
with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
pre-populate the listing of descriptor files that we have seen.
:param dict processed_files: mapping of absolute paths (**str**) to unix
timestamps for the last modified time (**int**)
"""
self._processed_files = dict(processed_files)
def register_read_listener(self, listener):
"""
Registers a listener for when files are read. This is executed prior to
processing files. Listeners are expected to be of the form...
::
my_listener(path)
:param functor listener: functor to be notified when files are read
"""
self._read_listeners.append(listener)
def register_skip_listener(self, listener):
"""
Registers a listener for files that are skipped. This listener is expected
to be a functor of the form...
::
my_listener(path, exception)
:param functor listener: functor to be notified of files that are skipped
to read errors or because they couldn't be parsed as valid descriptor data
"""
self._skip_listeners.append(listener)
def get_buffered_descriptor_count(self):
"""
Provides the number of descriptors that are waiting to be iterated over.
This is limited to the buffer_size that we were constructed with.
:returns: **int** for the estimated number of currently enqueued
descriptors, this is not entirely reliable
"""
return self._unreturned_descriptors.qsize()
def start(self):
"""
Starts reading our descriptor files.
:raises: **ValueError** if we're already reading the descriptor files
"""
with self._reader_thread_lock:
if self._reader_thread:
raise ValueError('Already running, you need to call stop() first')
else:
self._is_stopped.clear()
self._reader_thread = threading.Thread(target = self._read_descriptor_files, name='Descriptor reader')
self._reader_thread.setDaemon(True)
self._reader_thread.start()
def stop(self):
"""
Stops further reading of descriptor files.
"""
with self._reader_thread_lock:
self._is_stopped.set()
self._iter_notice.set()
# clears our queue to unblock enqueue calls
try:
while True:
self._unreturned_descriptors.get_nowait()
except queue.Empty:
pass
self._reader_thread.join()
self._reader_thread = None
if self._persistence_path:
try:
processed_files = self.get_processed_files()
save_processed_files(self._persistence_path, processed_files)
except:
pass
def _read_descriptor_files(self):
new_processed_files = {}
remaining_files = list(self._targets)
while remaining_files and not self._is_stopped.is_set():
target = remaining_files.pop(0)
if not os.path.exists(target):
self._notify_skip_listeners(target, FileMissing())
continue
if os.path.isdir(target):
walker = os.walk(target, followlinks = self._follow_links)
self._handle_walker(walker, new_processed_files)
else:
self._handle_file(target, new_processed_files)
self._processed_files = new_processed_files
if not self._is_stopped.is_set():
self._unreturned_descriptors.put(FINISHED)
self._iter_notice.set()
def __iter__(self):
with self._iter_lock:
while not self._is_stopped.is_set():
try:
descriptor = self._unreturned_descriptors.get_nowait()
if descriptor == FINISHED:
break
else:
yield descriptor
except queue.Empty:
self._iter_notice.wait()
self._iter_notice.clear()
def _handle_walker(self, walker, new_processed_files):
for root, _, files in walker:
for filename in files:
self._handle_file(os.path.join(root, filename), new_processed_files)
# this can take a while if, say, we're including the root directory
if self._is_stopped.is_set():
return
def _handle_file(self, target, new_processed_files):
# This is a file. Register its last modified timestamp and check if
# it's a file that we should skip.
try:
last_modified = int(os.stat(target).st_mtime)
last_used = self._processed_files.get(target)
new_processed_files[target] = last_modified
except OSError as exc:
self._notify_skip_listeners(target, ReadFailed(exc))
return
if last_used and last_used >= last_modified:
self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
return
# Block devices and such are never descriptors, and can cause us to block
# for quite a while so skipping anything that isn't a regular file.
if not os.path.isfile(target):
return
# The mimetypes module only checks the file extension. To actually
# check the content (like the 'file' command) we'd need something like
# pymagic (https://github.com/cloudburst/pymagic).
target_type = mimetypes.guess_type(target)
if target_type[0] in (None, 'text/plain'):
# either '.txt' or an unknown type
self._handle_descriptor_file(target, target_type)
elif stem.util.system.is_tarfile(target):
# handles gzip, bz2, and decompressed tarballs among others
self._handle_archive(target)
else:
self._notify_skip_listeners(target, UnrecognizedType(target_type))
def _handle_descriptor_file(self, target, mime_type):
try:
self._notify_read_listeners(target)
with open(target, 'rb') as target_file:
for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
if self._is_stopped.is_set():
return
self._unreturned_descriptors.put(desc)
self._iter_notice.set()
except TypeError:
self._notify_skip_listeners(target, UnrecognizedType(mime_type))
except ValueError as exc:
self._notify_skip_listeners(target, ParsingFailure(exc))
except IOError as exc:
self._notify_skip_listeners(target, ReadFailed(exc))
def _handle_archive(self, target):
# TODO: When dropping python 2.6 support go back to using 'with' for
# tarfiles...
#
# http://bugs.python.org/issue7232
tar_file = None
try:
self._notify_read_listeners(target)
tar_file = tarfile.open(target)
for tar_entry in tar_file:
if tar_entry.isfile():
entry = tar_file.extractfile(tar_entry)
try:
for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
if self._is_stopped.is_set():
return
desc._set_path(os.path.abspath(target))
desc._set_archive_path(tar_entry.name)
self._unreturned_descriptors.put(desc)
self._iter_notice.set()
except TypeError as exc:
self._notify_skip_listeners(target, ParsingFailure(exc))
except ValueError as exc:
self._notify_skip_listeners(target, ParsingFailure(exc))
finally:
entry.close()
except IOError as exc:
self._notify_skip_listeners(target, ReadFailed(exc))
finally:
if tar_file:
tar_file.close()
def _notify_read_listeners(self, path):
for listener in self._read_listeners:
listener(path)
def _notify_skip_listeners(self, path, exception):
for listener in self._skip_listeners:
listener(path, exception)
def __enter__(self):
self.start()
return self
def __exit__(self, exit_type, value, traceback):
self.stop()

View file

@ -0,0 +1,985 @@
# Copyright 2013-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Module for remotely retrieving descriptors from directory authorities and
mirrors. This is the simplest method for getting current tor descriptor
information...
::
import stem.descriptor.remote
for desc in stem.descriptor.remote.get_server_descriptors():
if desc.exit_policy.is_exiting_allowed():
print(' %s (%s)' % (desc.nickname, desc.fingerprint))
More custom downloading behavior can be done through the
:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
:class:`~stem.descriptor.remote.Query` instances to get you descriptor
content. For example...
::
from stem.descriptor.remote import DescriptorDownloader
downloader = DescriptorDownloader(
use_mirrors = True,
timeout = 10,
)
query = downloader.get_server_descriptors()
print('Exit Relays:')
try:
for desc in query.run():
if desc.exit_policy.is_exiting_allowed():
print(' %s (%s)' % (desc.nickname, desc.fingerprint))
print
print('Query took %0.2f seconds' % query.runtime)
except Exception as exc:
print('Unable to retrieve the server descriptors: %s' % exc)
::
get_instance - Provides a singleton DescriptorDownloader used for...
|- their_server_descriptor - provides the server descriptor of the relay we download from
|- get_server_descriptors - provides present server descriptors
|- get_extrainfo_descriptors - provides present extrainfo descriptors
+- get_consensus - provides the present consensus or router status entries
Query - Asynchronous request to download tor descriptors
|- start - issues the query if it isn't already running
+- run - blocks until the request is finished and provides the results
DescriptorDownloader - Configurable class for issuing queries
|- use_directory_mirrors - use directory mirrors to download future descriptors
|- their_server_descriptor - provides the server descriptor of the relay we download from
|- get_server_descriptors - provides present server descriptors
|- get_extrainfo_descriptors - provides present extrainfo descriptors
|- get_consensus - provides the present consensus or router status entries
|- get_key_certificates - provides present authority key certificates
+- query - request an arbitrary descriptor resource
.. versionadded:: 1.1.0
.. data:: MAX_FINGERPRINTS
Maximum number of descriptors that can requested at a time by their
fingerprints.
.. data:: MAX_MICRODESCRIPTOR_HASHES
Maximum number of microdescriptors that can requested at a time by their
hashes.
.. data:: Compression (enum)
Compression when downloading descriptors.
.. versionadded:: 1.7.0
=============== ===========
Compression Description
=============== ===========
**PLAINTEXT** Uncompressed data.
**GZIP** `GZip compression <https://www.gnu.org/software/gzip/>`_.
**ZSTD** `Zstandard compression <https://www.zstd.net>`_, this requires the `zstandard module <https://pypi.python.org/pypi/zstandard>`_.
**LZMA** `LZMA compression <https://en.wikipedia.org/wiki/LZMA>`_, this requires the 'lzma module <https://docs.python.org/3/library/lzma.html>`_.
=============== ===========
"""
import io
import random
import sys
import threading
import time
import zlib
import stem
import stem.client
import stem.descriptor
import stem.directory
import stem.prereq
import stem.util.enum
from stem.client.datatype import RelayCommand
from stem.util import log, str_tools
try:
# account for urllib's change between python 2.x and 3.x
import urllib.request as urllib
except ImportError:
import urllib2 as urllib
Compression = stem.util.enum.Enum(
('PLAINTEXT', 'identity'),
('GZIP', 'gzip'), # can also be 'deflate'
('ZSTD', 'x-zstd'),
('LZMA', 'x-tor-lzma'),
)
# Tor has a limited number of descriptors we can fetch explicitly by their
# fingerprint or hashes due to a limit on the url length by squid proxies.
MAX_FINGERPRINTS = 96
MAX_MICRODESCRIPTOR_HASHES = 90
SINGLETON_DOWNLOADER = None
def get_instance():
"""
Provides the singleton :class:`~stem.descriptor.remote.DescriptorDownloader`
used for the following functions...
* :func:`stem.descriptor.remote.get_server_descriptors`
* :func:`stem.descriptor.remote.get_extrainfo_descriptors`
* :func:`stem.descriptor.remote.get_consensus`
.. versionadded:: 1.5.0
:returns: singleton :class:`~stem.descriptor.remote.DescriptorDownloader` instance
"""
global SINGLETON_DOWNLOADER
if SINGLETON_DOWNLOADER is None:
SINGLETON_DOWNLOADER = DescriptorDownloader()
return SINGLETON_DOWNLOADER
def their_server_descriptor(**query_args):
"""
Provides the server descriptor of the relay we're downloading from.
.. versionadded:: 1.7.0
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
"""
return get_instance().their_server_descriptor(**query_args)
def get_server_descriptors(fingerprints = None, **query_args):
"""
Shorthand for
:func:`~stem.descriptor.remote.DescriptorDownloader.get_server_descriptors`
on our singleton instance.
.. versionadded:: 1.5.0
"""
return get_instance().get_server_descriptors(fingerprints, **query_args)
def get_extrainfo_descriptors(fingerprints = None, **query_args):
"""
Shorthand for
:func:`~stem.descriptor.remote.DescriptorDownloader.get_extrainfo_descriptors`
on our singleton instance.
.. versionadded:: 1.5.0
"""
return get_instance().get_extrainfo_descriptors(fingerprints, **query_args)
def get_consensus(authority_v3ident = None, microdescriptor = False, **query_args):
"""
Shorthand for
:func:`~stem.descriptor.remote.DescriptorDownloader.get_consensus`
on our singleton instance.
.. versionadded:: 1.5.0
"""
return get_instance().get_consensus(authority_v3ident, microdescriptor, **query_args)
class Query(object):
"""
Asynchronous request for descriptor content from a directory authority or
mirror. These can either be made through the
:class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more
advanced usage.
To block on the response and get results either call
:func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
:func:`~stem.descriptor.remote.Query.run` method pass along any errors that
arise...
::
from stem.descriptor.remote import Query
query = Query(
'/tor/server/all',
block = True,
timeout = 30,
)
print('Current relays:')
if not query.error:
for desc in query:
print(desc.fingerprint)
else:
print('Unable to retrieve the server descriptors: %s' % query.error)
... while iterating fails silently...
::
print('Current relays:')
for desc in Query('/tor/server/all', 'server-descriptor 1.0'):
print(desc.fingerprint)
In either case exceptions are available via our 'error' attribute.
Tor provides quite a few different descriptor resources via its directory
protocol (see section 4.2 and later of the `dir-spec
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_).
Commonly useful ones include...
=============================================== ===========
Resource Description
=============================================== ===========
/tor/server/all all present server descriptors
/tor/server/fp/<fp1>+<fp2>+<fp3> server descriptors with the given fingerprints
/tor/extra/all all present extrainfo descriptors
/tor/extra/fp/<fp1>+<fp2>+<fp3> extrainfo descriptors with the given fingerprints
/tor/micro/d/<hash1>-<hash2> microdescriptors with the given hashes
/tor/status-vote/current/consensus present consensus
/tor/status-vote/current/consensus-microdesc present microdescriptor consensus
/tor/keys/all key certificates for the authorities
/tor/keys/fp/<v3ident1>+<v3ident2> key certificates for specific authorities
=============================================== ===========
**ZSTD** compression requires `zstandard
<https://pypi.python.org/pypi/zstandard>`_, and **LZMA** requires the `lzma
module <https://docs.python.org/3/library/lzma.html>`_.
For legacy reasons if our resource has a '.z' suffix then our **compression**
argument is overwritten with Compression.GZIP.
.. versionchanged:: 1.7.0
Added support for downloading from ORPorts.
.. versionchanged:: 1.7.0
Added the compression argument.
.. versionchanged:: 1.7.0
Added the reply_headers attribute.
The class this provides changed between Python versions. In python2
this was called httplib.HTTPMessage, whereas in python3 the class was
renamed to http.client.HTTPMessage.
.. versionchanged:: 1.7.0
Endpoints are now expected to be :class:`~stem.DirPort` or
:class:`~stem.ORPort` instances. Usage of tuples for this
argument is deprecated and will be removed in the future.
.. versionchanged:: 1.7.0
Avoid downloading from tor26. This directory authority throttles its
DirPort to such an extent that requests either time out or take on the
order of minutes.
.. versionchanged:: 1.7.0
Avoid downloading from Bifroest. This is the bridge authority so it
doesn't vote in the consensus, and apparently times out frequently.
:var str resource: resource being fetched, such as '/tor/server/all'
:var str descriptor_type: type of descriptors being fetched (for options see
:func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
resource if **None**
:var list endpoints: :class:`~stem.DirPort` or :class:`~stem.ORPort` of the
authority or mirror we're querying, this uses authorities if undefined
:var list compression: list of :data:`stem.descriptor.remote.Compression`
we're willing to accept, when none are mutually supported downloads fall
back to Compression.PLAINTEXT
:var int retries: number of times to attempt the request if downloading it
fails
:var bool fall_back_to_authority: when retrying request issues the last
request to a directory authority if **True**
:var str content: downloaded descriptor content
:var Exception error: exception if a problem occured
:var bool is_done: flag that indicates if our request has finished
:var float start_time: unix timestamp when we first started running
:var http.client.HTTPMessage reply_headers: headers provided in the response,
**None** if we haven't yet made our request
:var float runtime: time our query took, this is **None** if it's not yet
finished
:var bool validate: checks the validity of the descriptor's content if
**True**, skips these checks otherwise
:var stem.descriptor.__init__.DocumentHandler document_handler: method in
which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
:var dict kwargs: additional arguments for the descriptor constructor
Following are only applicable when downloading from a
:class:`~stem.DirPort`...
:var float timeout: duration before we'll time out our request
:var str download_url: last url used to download the descriptor, this is
unset until we've actually made a download attempt
:param bool start: start making the request when constructed (default is **True**)
:param bool block: only return after the request has been completed, this is
the same as running **query.run(True)** (default is **False**)
"""
def __init__(self, resource, descriptor_type = None, endpoints = None, compression = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
if not resource.startswith('/'):
raise ValueError("Resources should start with a '/': %s" % resource)
if resource.endswith('.z'):
compression = [Compression.GZIP]
resource = resource[:-2]
elif compression is None:
compression = [Compression.PLAINTEXT]
else:
if isinstance(compression, str):
compression = [compression] # caller provided only a single option
if Compression.ZSTD in compression and not stem.prereq.is_zstd_available():
compression.remove(Compression.ZSTD)
if Compression.LZMA in compression and not stem.prereq.is_lzma_available():
compression.remove(Compression.LZMA)
if not compression:
compression = [Compression.PLAINTEXT]
if descriptor_type:
self.descriptor_type = descriptor_type
else:
self.descriptor_type = _guess_descriptor_type(resource)
self.endpoints = []
if endpoints:
for endpoint in endpoints:
if isinstance(endpoint, tuple) and len(endpoint) == 2:
self.endpoints.append(stem.DirPort(endpoint[0], endpoint[1])) # TODO: remove this in stem 2.0
elif isinstance(endpoint, (stem.ORPort, stem.DirPort)):
self.endpoints.append(endpoint)
else:
raise ValueError("Endpoints must be an stem.ORPort, stem.DirPort, or two value tuple. '%s' is a %s." % (endpoint, type(endpoint).__name__))
self.resource = resource
self.compression = compression
self.retries = retries
self.fall_back_to_authority = fall_back_to_authority
self.content = None
self.error = None
self.is_done = False
self.download_url = None
self.start_time = None
self.timeout = timeout
self.runtime = None
self.validate = validate
self.document_handler = document_handler
self.reply_headers = None
self.kwargs = kwargs
self._downloader_thread = None
self._downloader_thread_lock = threading.RLock()
if start:
self.start()
if block:
self.run(True)
def start(self):
"""
Starts downloading the scriptors if we haven't started already.
"""
with self._downloader_thread_lock:
if self._downloader_thread is None:
self._downloader_thread = threading.Thread(
name = 'Descriptor query',
target = self._download_descriptors,
args = (self.retries, self.timeout)
)
self._downloader_thread.setDaemon(True)
self._downloader_thread.start()
def run(self, suppress = False):
"""
Blocks until our request is complete then provides the descriptors. If we
haven't yet started our request then this does so.
:param bool suppress: avoids raising exceptions if **True**
:returns: list for the requested :class:`~stem.descriptor.__init__.Descriptor` instances
:raises:
Using the iterator can fail with the following if **suppress** is
**False**...
* **ValueError** if the descriptor contents is malformed
* **socket.timeout** if our request timed out
* **urllib2.URLError** for most request failures
Note that the urllib2 module may fail with other exception types, in
which case we'll pass it along.
"""
return list(self._run(suppress))
def _run(self, suppress):
with self._downloader_thread_lock:
self.start()
self._downloader_thread.join()
if self.error:
if suppress:
return
raise self.error
else:
if self.content is None:
if suppress:
return
raise ValueError('BUG: _download_descriptors() finished without either results or an error')
try:
results = stem.descriptor.parse_file(
io.BytesIO(self.content),
self.descriptor_type,
validate = self.validate,
document_handler = self.document_handler,
**self.kwargs
)
for desc in results:
yield desc
except ValueError as exc:
self.error = exc # encountered a parsing error
if suppress:
return
raise self.error
def __iter__(self):
for desc in self._run(True):
yield desc
def _pick_endpoint(self, use_authority = False):
"""
Provides an endpoint to query. If we have multiple endpoints then one
is picked at random.
:param bool use_authority: ignores our endpoints and uses a directory
authority instead
:returns: **str** for the url being queried by this request
"""
if use_authority or not self.endpoints:
picked = random.choice([auth for auth in stem.directory.Authority.from_cache().values() if auth.nickname not in ('tor26', 'Bifroest')])
return stem.DirPort(picked.address, picked.dir_port)
else:
return random.choice(self.endpoints)
def _download_descriptors(self, retries, timeout):
try:
self.start_time = time.time()
endpoint = self._pick_endpoint(use_authority = retries == 0 and self.fall_back_to_authority)
if isinstance(endpoint, stem.ORPort):
self.content, self.reply_headers = _download_from_orport(endpoint, self.compression, self.resource)
elif isinstance(endpoint, stem.DirPort):
self.download_url = 'http://%s:%i/%s' % (endpoint.address, endpoint.port, self.resource.lstrip('/'))
self.content, self.reply_headers = _download_from_dirport(self.download_url, self.compression, timeout)
else:
raise ValueError("BUG: endpoints can only be ORPorts or DirPorts, '%s' was a %s" % (endpoint, type(endpoint).__name__))
self.runtime = time.time() - self.start_time
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
except:
exc = sys.exc_info()[1]
if timeout is not None:
timeout -= time.time() - self.start_time
if retries > 0 and (timeout is None or timeout > 0):
log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
return self._download_descriptors(retries - 1, timeout)
else:
log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
self.error = exc
finally:
self.is_done = True
class DescriptorDownloader(object):
"""
Configurable class that issues :class:`~stem.descriptor.remote.Query`
instances on your behalf.
:param bool use_mirrors: downloads the present consensus and uses the directory
mirrors to fetch future requests, this fails silently if the consensus
cannot be downloaded
:param default_args: default arguments for the
:class:`~stem.descriptor.remote.Query` constructor
"""
def __init__(self, use_mirrors = False, **default_args):
self._default_args = default_args
directories = list(stem.directory.Authority.from_cache().values())
self._endpoints = [(directory.address, directory.dir_port) for directory in directories]
if use_mirrors:
try:
start_time = time.time()
self.use_directory_mirrors()
log.debug('Retrieved directory mirrors (took %0.2fs)' % (time.time() - start_time))
except Exception as exc:
log.debug('Unable to retrieve directory mirrors: %s' % exc)
def use_directory_mirrors(self):
"""
Downloads the present consensus and configures ourselves to use directory
mirrors, in addition to authorities.
:returns: :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
from which we got the directory mirrors
:raises: **Exception** if unable to determine the directory mirrors
"""
directories = stem.directory.Authority.from_cache().values()
new_endpoints = set([(directory.address, directory.dir_port) for directory in directories])
consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0]
for desc in consensus.routers.values():
if stem.Flag.V2DIR in desc.flags:
new_endpoints.add((desc.address, desc.dir_port))
# we need our endpoints to be a list rather than set for random.choice()
self._endpoints = list(new_endpoints)
return consensus
def their_server_descriptor(self, **query_args):
"""
Provides the server descriptor of the relay we're downloading from.
.. versionadded:: 1.7.0
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
"""
return self.query('/tor/server/authority', **query_args)
def get_server_descriptors(self, fingerprints = None, **query_args):
"""
Provides the server descriptors with the given fingerprints. If no
fingerprints are provided then this returns all descriptors known
by the relay.
:param str,list fingerprints: fingerprint or list of fingerprints to be
retrieved, gets all descriptors if **None**
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
:raises: **ValueError** if we request more than 96 descriptors by their
fingerprints (this is due to a limit on the url length by squid proxies).
"""
resource = '/tor/server/all'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
if fingerprints:
if len(fingerprints) > MAX_FINGERPRINTS:
raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS)
resource = '/tor/server/fp/%s' % '+'.join(fingerprints)
return self.query(resource, **query_args)
def get_extrainfo_descriptors(self, fingerprints = None, **query_args):
"""
Provides the extrainfo descriptors with the given fingerprints. If no
fingerprints are provided then this returns all descriptors in the present
consensus.
:param str,list fingerprints: fingerprint or list of fingerprints to be
retrieved, gets all descriptors if **None**
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors
:raises: **ValueError** if we request more than 96 descriptors by their
fingerprints (this is due to a limit on the url length by squid proxies).
"""
resource = '/tor/extra/all'
if isinstance(fingerprints, str):
fingerprints = [fingerprints]
if fingerprints:
if len(fingerprints) > MAX_FINGERPRINTS:
raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS)
resource = '/tor/extra/fp/%s' % '+'.join(fingerprints)
return self.query(resource, **query_args)
# TODO: drop in stem 2.x
def get_microdescriptors(self, hashes, **query_args):
"""
Provides the microdescriptors with the given hashes. To get these see the
'microdescriptor_hashes' attribute of
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`. Note
that these are only provided via a microdescriptor consensus (such as
'cached-microdesc-consensus' in your data directory).
.. deprecated:: 1.5.0
This function has never worked, as it was never implemented in tor
(:trac:`9271`).
:param str,list hashes: microdescriptor hash or list of hashes to be
retrieved
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors
:raises: **ValueError** if we request more than 92 microdescriptors by their
hashes (this is due to a limit on the url length by squid proxies).
"""
if isinstance(hashes, str):
hashes = [hashes]
if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
raise ValueError('Unable to request more than %i microdescriptors at a time by their hashes' % MAX_MICRODESCRIPTOR_HASHES)
return self.query('/tor/micro/d/%s' % '-'.join(hashes), **query_args)
def get_consensus(self, authority_v3ident = None, microdescriptor = False, **query_args):
"""
Provides the present router status entries.
.. versionchanged:: 1.5.0
Added the microdescriptor argument.
:param str authority_v3ident: fingerprint of the authority key for which
to get the consensus, see `'v3ident' in tor's config.c
<https://gitweb.torproject.org/tor.git/tree/src/or/config.c>`_
for the values.
:param bool microdescriptor: provides the microdescriptor consensus if
**True**, standard consensus otherwise
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the router status
entries
"""
if microdescriptor:
resource = '/tor/status-vote/current/consensus-microdesc'
else:
resource = '/tor/status-vote/current/consensus'
if authority_v3ident:
resource += '/%s' % authority_v3ident
consensus_query = self.query(resource, **query_args)
# if we're performing validation then check that it's signed by the
# authority key certificates
if consensus_query.validate and consensus_query.document_handler == stem.descriptor.DocumentHandler.DOCUMENT and stem.prereq.is_crypto_available():
consensus = list(consensus_query.run())[0]
key_certs = self.get_key_certificates(**query_args).run()
consensus.validate_signatures(key_certs)
return consensus_query
def get_vote(self, authority, **query_args):
"""
Provides the present vote for a given directory authority.
:param stem.directory.Authority authority: authority for which to retrieve a vote for
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the router status
entries
"""
resource = '/tor/status-vote/current/authority'
if 'endpoint' not in query_args:
query_args['endpoints'] = [(authority.address, authority.dir_port)]
return self.query(resource, **query_args)
def get_key_certificates(self, authority_v3idents = None, **query_args):
"""
Provides the key certificates for authorities with the given fingerprints.
If no fingerprints are provided then this returns all present key
certificates.
:param str authority_v3idents: fingerprint or list of fingerprints of the
authority keys, see `'v3ident' in tor's config.c
<https://gitweb.torproject.org/tor.git/tree/src/or/config.c#n819>`_
for the values.
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the key certificates
:raises: **ValueError** if we request more than 96 key certificates by
their identity fingerprints (this is due to a limit on the url length by
squid proxies).
"""
resource = '/tor/keys/all'
if isinstance(authority_v3idents, str):
authority_v3idents = [authority_v3idents]
if authority_v3idents:
if len(authority_v3idents) > MAX_FINGERPRINTS:
raise ValueError('Unable to request more than %i key certificates at a time by their identity fingerprints' % MAX_FINGERPRINTS)
resource = '/tor/keys/fp/%s' % '+'.join(authority_v3idents)
return self.query(resource, **query_args)
def query(self, resource, **query_args):
"""
Issues a request for the given resource.
.. versionchanged:: 1.7.0
The **fall_back_to_authority** default when using this method is now
**False**, like the :class:`~stem.descriptor.Query` class.
:param str resource: resource being fetched, such as '/tor/server/all'
:param query_args: additional arguments for the
:class:`~stem.descriptor.remote.Query` constructor
:returns: :class:`~stem.descriptor.remote.Query` for the descriptors
:raises: **ValueError** if resource is clearly invalid or the descriptor
type can't be determined when 'descriptor_type' is **None**
"""
args = dict(self._default_args)
args.update(query_args)
if 'endpoints' not in args:
args['endpoints'] = self._endpoints
return Query(resource, **args)
def _download_from_orport(endpoint, compression, resource):
"""
Downloads descriptors from the given orport. Payload is just like an http
response (headers and all)...
::
HTTP/1.0 200 OK
Date: Mon, 23 Apr 2018 18:43:47 GMT
Content-Type: text/plain
X-Your-Address-Is: 216.161.254.25
Content-Encoding: identity
Expires: Wed, 25 Apr 2018 18:43:47 GMT
router dannenberg 193.23.244.244 443 0 80
identity-ed25519
... rest of the descriptor content...
:param stem.ORPort endpoint: endpoint to download from
:param list compression: compression methods for the request
:param str resource: descriptor resource to download
:returns: two value tuple of the form (data, reply_headers)
:raises:
* :class:`stem.ProtocolError` if not a valid descriptor response
* :class:`stem.SocketError` if unable to establish a connection
"""
link_protocols = endpoint.link_protocols if endpoint.link_protocols else [3]
with stem.client.Relay.connect(endpoint.address, endpoint.port, link_protocols) as relay:
with relay.create_circuit() as circ:
request = '\r\n'.join((
'GET %s HTTP/1.0' % resource,
'Accept-Encoding: %s' % ', '.join(compression),
'User-Agent: %s' % stem.USER_AGENT,
)) + '\r\n\r\n'
circ.send(RelayCommand.BEGIN_DIR, stream_id = 1)
response = b''.join([cell.data for cell in circ.send(RelayCommand.DATA, request, stream_id = 1)])
first_line, data = response.split(b'\r\n', 1)
header_data, body_data = data.split(b'\r\n\r\n', 1)
if not first_line.startswith(b'HTTP/1.0 2'):
raise stem.ProtocolError("Response should begin with HTTP success, but was '%s'" % str_tools._to_unicode(first_line))
headers = {}
for line in str_tools._to_unicode(header_data).splitlines():
if ': ' not in line:
raise stem.ProtocolError("'%s' is not a HTTP header:\n\n%s" % line)
key, value = line.split(': ', 1)
headers[key] = value
return _decompress(body_data, headers.get('Content-Encoding')), headers
def _download_from_dirport(url, compression, timeout):
"""
Downloads descriptors from the given url.
:param str url: dirport url from which to download from
:param list compression: compression methods for the request
:param float timeout: duration before we'll time out our request
:returns: two value tuple of the form (data, reply_headers)
:raises:
* **socket.timeout** if our request timed out
* **urllib2.URLError** for most request failures
"""
response = urllib.urlopen(
urllib.Request(
url,
headers = {
'Accept-Encoding': ', '.join(compression),
'User-Agent': stem.USER_AGENT,
}
),
timeout = timeout,
)
return _decompress(response.read(), response.headers.get('Content-Encoding')), response.headers
def _decompress(data, encoding):
"""
Decompresses descriptor data.
Tor doesn't include compression headers. As such when using gzip we
need to include '32' for automatic header detection...
https://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
... and with zstd we need to use the streaming API.
:param bytes data: data we received
:param str encoding: 'Content-Encoding' header of the response
:raises:
* **ValueError** if encoding is unrecognized
* **ImportError** if missing the decompression module
"""
if encoding == Compression.PLAINTEXT:
return data.strip()
elif encoding in (Compression.GZIP, 'deflate'):
return zlib.decompress(data, zlib.MAX_WBITS | 32).strip()
elif encoding == Compression.ZSTD:
if not stem.prereq.is_zstd_available():
raise ImportError('Decompressing zstd data requires https://pypi.python.org/pypi/zstandard')
import zstd
output_buffer = io.BytesIO()
with zstd.ZstdDecompressor().write_to(output_buffer) as decompressor:
decompressor.write(data)
return output_buffer.getvalue().strip()
elif encoding == Compression.LZMA:
if not stem.prereq.is_lzma_available():
raise ImportError('Decompressing lzma data requires https://docs.python.org/3/library/lzma.html')
import lzma
return lzma.decompress(data).strip()
else:
raise ValueError("'%s' isn't a recognized type of encoding" % encoding)
def _guess_descriptor_type(resource):
# Attempts to determine the descriptor type based on the resource url. This
# raises a ValueError if the resource isn't recognized.
if resource.startswith('/tor/server/'):
return 'server-descriptor 1.0'
elif resource.startswith('/tor/extra/'):
return 'extra-info 1.0'
elif resource.startswith('/tor/micro/'):
return 'microdescriptor 1.0'
elif resource.startswith('/tor/status-vote/current/consensus-microdesc'):
return 'network-status-microdesc-consensus-3 1.0'
elif resource.startswith('/tor/status-vote/'):
return 'network-status-consensus-3 1.0'
elif resource.startswith('/tor/keys/'):
return 'dir-key-certificate-3 1.0'
else:
raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
def get_authorities():
"""
Provides cached Tor directory authority information. The directory
information hardcoded into Tor and occasionally changes, so the information
this provides might not necessarily match your version of tor.
.. deprecated:: 1.7.0
Use stem.directory.Authority.from_cache() instead.
:returns: **dict** of **str** nicknames to :class:`~stem.directory.Authority` instances
"""
return DirectoryAuthority.from_cache()
# TODO: drop aliases in stem 2.0
Directory = stem.directory.Directory
DirectoryAuthority = stem.directory.Authority
FallbackDirectory = stem.directory.Fallback

View file

@ -0,0 +1,744 @@
# Copyright 2012-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Parsing for router status entries, the information for individual routers
within a network status document. This information is provided from a few
sources...
* control port via 'GETINFO ns/\*' and 'GETINFO md/\*' queries
* router entries in a network status document, like the cached-consensus
**Module Overview:**
::
RouterStatusEntry - Common parent for router status entries
|- RouterStatusEntryV2 - Entry for a network status v2 document
|- RouterStatusEntryV3 - Entry for a network status v3 document
+- RouterStatusEntryMicroV3 - Entry for a microdescriptor flavored v3 document
"""
import base64
import binascii
import stem.exit_policy
import stem.prereq
import stem.util.str_tools
from stem.descriptor import (
KEYWORD_LINE,
Descriptor,
_descriptor_content,
_value,
_values,
_descriptor_components,
_parse_protocol_line,
_read_until_keywords,
_random_nickname,
_random_ipv4_address,
_random_date,
)
_parse_pr_line = _parse_protocol_line('pr', 'protocols')
def _parse_file(document_file, validate, entry_class, entry_keyword = 'r', start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
"""
Reads a range of the document_file containing some number of entry_class
instances. We deliminate the entry_class entries by the keyword on their
first line (entry_keyword). When finished the document is left at the
end_position.
Either an end_position or section_end_keywords must be provided.
:param file document_file: file with network status document content
:param bool validate: checks the validity of the document's contents if
**True**, skips these checks otherwise
:param class entry_class: class to construct instance for
:param str entry_keyword: first keyword for the entry instances
:param int start_position: start of the section, default is the current position
:param int end_position: end of the section
:param tuple section_end_keywords: keyword(s) that deliminate the end of the
section if no end_position was provided
:param tuple extra_args: extra arguments for the entry_class (after the
content and validate flag)
:returns: iterator over entry_class instances
:raises:
* **ValueError** if the contents is malformed and validate is **True**
* **IOError** if the file can't be read
"""
if start_position:
document_file.seek(start_position)
else:
start_position = document_file.tell()
# check if we're starting at the end of the section (ie, there's no entries to read)
if section_end_keywords:
first_keyword = None
line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
if line_match:
first_keyword = line_match.groups()[0]
document_file.seek(start_position)
if first_keyword in section_end_keywords:
return
while end_position is None or document_file.tell() < end_position:
desc_lines, ending_keyword = _read_until_keywords(
(entry_keyword,) + section_end_keywords,
document_file,
ignore_first = True,
end_position = end_position,
include_ending_keyword = True
)
desc_content = bytes.join(b'', desc_lines)
if desc_content:
yield entry_class(desc_content, validate, *extra_args)
# check if we stopped at the end of the section
if ending_keyword in section_end_keywords:
break
else:
break
def _parse_r_line(descriptor, entries):
# Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
# all current entry types (v2, v3, and microdescriptor v3) with one little
# wrinkle: only the microdescriptor flavor excludes a 'digest' field.
#
# For v2 and v3 router status entries:
# "r" nickname identity digest publication IP ORPort DirPort
# example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
#
# For v3 microdescriptor router status entries:
# "r" nickname identity publication IP ORPort DirPort
# example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
value = _value('r', entries)
include_digest = not isinstance(descriptor, RouterStatusEntryMicroV3)
r_comp = value.split(' ')
# inject a None for the digest to normalize the field positioning
if not include_digest:
r_comp.insert(2, None)
if len(r_comp) < 8:
expected_field_count = 'eight' if include_digest else 'seven'
raise ValueError("%s 'r' line must have %s values: r %s" % (descriptor._name(), expected_field_count, value))
if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
raise ValueError("%s nickname isn't valid: %s" % (descriptor._name(), r_comp[0]))
elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
raise ValueError("%s address isn't a valid IPv4 address: %s" % (descriptor._name(), r_comp[5]))
elif not stem.util.connection.is_valid_port(r_comp[6]):
raise ValueError('%s ORPort is invalid: %s' % (descriptor._name(), r_comp[6]))
elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
raise ValueError('%s DirPort is invalid: %s' % (descriptor._name(), r_comp[7]))
descriptor.nickname = r_comp[0]
descriptor.fingerprint = _base64_to_hex(r_comp[1])
if include_digest:
descriptor.digest = _base64_to_hex(r_comp[2])
descriptor.address = r_comp[5]
descriptor.or_port = int(r_comp[6])
descriptor.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
try:
published = '%s %s' % (r_comp[3], r_comp[4])
descriptor.published = stem.util.str_tools._parse_timestamp(published)
except ValueError:
raise ValueError("Publication time time wasn't parsable: r %s" % value)
def _parse_a_line(descriptor, entries):
# "a" SP address ":" portlist
# example: a [2001:888:2133:0:82:94:251:204]:9001
or_addresses = []
for value in _values('a', entries):
if ':' not in value:
raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (descriptor._name(), value))
address, port = value.rsplit(':', 1)
if not stem.util.connection.is_valid_ipv4_address(address) and not stem.util.connection.is_valid_ipv6_address(address, allow_brackets = True):
raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (descriptor._name(), value))
if stem.util.connection.is_valid_port(port):
or_addresses.append((address.lstrip('[').rstrip(']'), int(port), stem.util.connection.is_valid_ipv6_address(address, allow_brackets = True)))
else:
raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (descriptor._name(), port, value))
descriptor.or_addresses = or_addresses
def _parse_s_line(descriptor, entries):
# "s" Flags
# example: s Named Running Stable Valid
value = _value('s', entries)
flags = [] if value == '' else value.split(' ')
descriptor.flags = flags
for flag in flags:
if flags.count(flag) > 1:
raise ValueError('%s had duplicate flags: s %s' % (descriptor._name(), value))
elif flag == '':
raise ValueError("%s had extra whitespace on its 's' line: s %s" % (descriptor._name(), value))
def _parse_v_line(descriptor, entries):
# "v" version
# example: v Tor 0.2.2.35
#
# The spec says that if this starts with "Tor " then what follows is a
# tor version. If not then it has "upgraded to a more sophisticated
# protocol versioning system".
value = _value('v', entries)
descriptor.version_line = value
if value.startswith('Tor '):
try:
descriptor.version = stem.version._get_version(value[4:])
except ValueError as exc:
raise ValueError('%s has a malformed tor version (%s): v %s' % (descriptor._name(), exc, value))
def _parse_w_line(descriptor, entries):
# "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
# example: w Bandwidth=7980
value = _value('w', entries)
w_comp = value.split(' ')
if len(w_comp) < 1:
raise ValueError("%s 'w' line is blank: w %s" % (descriptor._name(), value))
elif not w_comp[0].startswith('Bandwidth='):
raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (descriptor._name(), value))
bandwidth = None
measured = None
is_unmeasured = False
unrecognized_bandwidth_entries = []
for w_entry in w_comp:
if '=' in w_entry:
w_key, w_value = w_entry.split('=', 1)
else:
w_key, w_value = w_entry, None
if w_key == 'Bandwidth':
if not (w_value and w_value.isdigit()):
raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (descriptor._name(), value))
bandwidth = int(w_value)
elif w_key == 'Measured':
if not (w_value and w_value.isdigit()):
raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (descriptor._name(), value))
measured = int(w_value)
elif w_key == 'Unmeasured':
if w_value != '1':
raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (descriptor._name(), value))
is_unmeasured = True
else:
unrecognized_bandwidth_entries.append(w_entry)
descriptor.bandwidth = bandwidth
descriptor.measured = measured
descriptor.is_unmeasured = is_unmeasured
descriptor.unrecognized_bandwidth_entries = unrecognized_bandwidth_entries
def _parse_p_line(descriptor, entries):
# "p" ("accept" / "reject") PortList
#
# examples:
#
# p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
# p reject 1-65535
value = _value('p', entries)
try:
descriptor.exit_policy = stem.exit_policy.MicroExitPolicy(value)
except ValueError as exc:
raise ValueError('%s exit policy is malformed (%s): p %s' % (descriptor._name(), exc, value))
def _parse_id_line(descriptor, entries):
# "id" "ed25519" ed25519-identity
#
# examples:
#
# id ed25519 none
# id ed25519 8RH34kO07Pp+XYwzdoATVyCibIvmbslUjRkAm7J4IA8
value = _value('id', entries)
if value:
if descriptor.document and not descriptor.document.is_vote:
raise ValueError("%s 'id' line should only appear in votes: id %s" % (descriptor._name(), value))
value_comp = value.split()
if len(value_comp) >= 2:
descriptor.identifier_type = value_comp[0]
descriptor.identifier = value_comp[1]
else:
raise ValueError("'id' lines should contain both the key type and digest: id %s" % value)
def _parse_m_line(descriptor, entries):
# "m" methods 1*(algorithm "=" digest)
# example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
all_hashes = []
for value in _values('m', entries):
m_comp = value.split(' ')
if not (descriptor.document and descriptor.document.is_vote):
vote_status = 'vote' if descriptor.document else '<undefined document>'
raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (descriptor._name(), vote_status, value))
elif len(m_comp) < 1:
raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (descriptor._name(), value))
try:
methods = [int(entry) for entry in m_comp[0].split(',')]
except ValueError:
raise ValueError('%s microdescriptor methods should be a series of comma separated integers: m %s' % (descriptor._name(), value))
hashes = {}
for entry in m_comp[1:]:
if '=' not in entry:
raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (descriptor._name(), value))
hash_name, digest = entry.split('=', 1)
hashes[hash_name] = digest
all_hashes.append((methods, hashes))
descriptor.microdescriptor_hashes = all_hashes
def _parse_microdescriptor_m_line(descriptor, entries):
# "m" digest
# example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
descriptor.digest = _base64_to_hex(_value('m', entries), check_if_fingerprint = False)
def _base64_to_hex(identity, check_if_fingerprint = True):
"""
Decodes a base64 value to hex. For example...
::
>>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
:param str identity: encoded fingerprint from the consensus
:param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
:returns: **str** with the uppercase hex encoding of the relay's fingerprint
:raises: **ValueError** if the result isn't a valid fingerprint
"""
# trailing equal signs were stripped from the identity
missing_padding = len(identity) % 4
identity += '=' * missing_padding
try:
identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
except (TypeError, binascii.Error):
raise ValueError("Unable to decode identity string '%s'" % identity)
fingerprint = binascii.hexlify(identity_decoded).upper()
if stem.prereq.is_python_3():
fingerprint = stem.util.str_tools._to_unicode(fingerprint)
if check_if_fingerprint:
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
return fingerprint
class RouterStatusEntry(Descriptor):
"""
Information about an individual router stored within a network status
document. This is the common parent for concrete status entry types.
:var stem.descriptor.networkstatus.NetworkStatusDocument document: **\*** document that this descriptor came from
:var str nickname: **\*** router's nickname
:var str fingerprint: **\*** router's fingerprint
:var datetime published: **\*** router's publication
:var str address: **\*** router's IP address
:var int or_port: **\*** router's ORPort
:var int dir_port: **\*** router's DirPort
:var list flags: **\*** list of :data:`~stem.Flag` associated with the relay
:var stem.version.Version version: parsed version of tor, this is **None** if
the relay's using a new versioning scheme
:var str version_line: versioning information reported by the relay
"""
ATTRIBUTES = {
'nickname': (None, _parse_r_line),
'fingerprint': (None, _parse_r_line),
'published': (None, _parse_r_line),
'address': (None, _parse_r_line),
'or_port': (None, _parse_r_line),
'dir_port': (None, _parse_r_line),
'flags': (None, _parse_s_line),
'version_line': (None, _parse_v_line),
'version': (None, _parse_v_line),
}
PARSER_FOR_LINE = {
'r': _parse_r_line,
's': _parse_s_line,
'v': _parse_v_line,
}
def __init__(self, content, validate = False, document = None):
"""
Parse a router descriptor in a network status document.
:param str content: router descriptor content to be parsed
:param NetworkStatusDocument document: document this descriptor came from
:param bool validate: checks the validity of the content if **True**, skips
these checks otherwise
:raises: **ValueError** if the descriptor data is invalid
"""
super(RouterStatusEntry, self).__init__(content, lazy_load = not validate)
self.document = document
entries = _descriptor_components(content, validate)
if validate:
for keyword in self._required_fields():
if keyword not in entries:
raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
for keyword in self._single_fields():
if keyword in entries and len(entries[keyword]) > 1:
raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
if 'r' != list(entries.keys())[0]:
raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
self._parse(entries, validate)
else:
self._entries = entries
def _name(self, is_plural = False):
"""
Name for this descriptor type.
"""
return 'Router status entries' if is_plural else 'Router status entry'
def _required_fields(self):
"""
Provides lines that must appear in the descriptor.
"""
return ()
def _single_fields(self):
"""
Provides lines that can only appear in the descriptor once.
"""
return ()
def _compare(self, other, method):
if not isinstance(other, RouterStatusEntry):
return False
return method(str(self).strip(), str(other).strip())
def __hash__(self):
return hash(str(self).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __ne__(self, other):
return not self == other
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class RouterStatusEntryV2(RouterStatusEntry):
"""
Information about an individual router stored within a version 2 network
status document.
:var str digest: **\*** router's upper-case hex digest
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as **None** if undefined
"""
ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
'digest': (None, _parse_r_line),
})
@classmethod
def content(cls, attr = None, exclude = (), sign = False):
if sign:
raise NotImplementedError('Signing of %s not implemented' % cls.__name__)
return _descriptor_content(attr, exclude, (
('r', '%s p1aag7VwarGxqctS7/fS0y5FU+s oQZFLYe9e4A7bOkWKR7TaNxb0JE %s %s 9001 0' % (_random_nickname(), _random_date(), _random_ipv4_address())),
))
def _name(self, is_plural = False):
return 'Router status entries (v2)' if is_plural else 'Router status entry (v2)'
def _required_fields(self):
return ('r')
def _single_fields(self):
return ('r', 's', 'v')
def _compare(self, other, method):
if not isinstance(other, RouterStatusEntryV2):
return False
return method(str(self).strip(), str(other).strip())
def __hash__(self):
return hash(str(self).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __ne__(self, other):
return not self == other
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class RouterStatusEntryV3(RouterStatusEntry):
"""
Information about an individual router stored within a version 3 network
status document.
:var list or_addresses: **\*** relay's OR addresses, this is a tuple listing
of the form (address (**str**), port (**int**), is_ipv6 (**bool**))
:var str identifier_type: identity digest key type
:var str identifier: base64 encoded identity digest
:var str digest: **\*** router's upper-case hex digest
:var int bandwidth: bandwidth measured to be available by the relay, this is a
unit-less heuristic generated by the Bandwidth authoritites to weight relay
selection
:var int measured: *bandwidth* vote provided by a bandwidth authority
:var bool is_unmeasured: *bandwidth* measurement isn't based on three or more
measurements
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
information that isn't yet recognized
:var stem.exit_policy.MicroExitPolicy exit_policy: router's exit policy
:var dict protocols: mapping of protocols to their supported versions
:var list microdescriptor_hashes: **\*** tuples of two values, the list of
consensus methods for generating a set of digests and the 'algorithm =>
digest' mappings
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as **None** if undefined
.. versionchanged:: 1.5.0
Added the identifier and identifier_type attributes.
.. versionchanged:: 1.6.0
Added the protocols attribute.
"""
ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
'digest': (None, _parse_r_line),
'or_addresses': ([], _parse_a_line),
'identifier_type': (None, _parse_id_line),
'identifier': (None, _parse_id_line),
'bandwidth': (None, _parse_w_line),
'measured': (None, _parse_w_line),
'is_unmeasured': (False, _parse_w_line),
'unrecognized_bandwidth_entries': ([], _parse_w_line),
'exit_policy': (None, _parse_p_line),
'protocols': ({}, _parse_pr_line),
'microdescriptor_hashes': ([], _parse_m_line),
})
PARSER_FOR_LINE = dict(RouterStatusEntry.PARSER_FOR_LINE, **{
'a': _parse_a_line,
'w': _parse_w_line,
'p': _parse_p_line,
'pr': _parse_pr_line,
'id': _parse_id_line,
'm': _parse_m_line,
})
@classmethod
def content(cls, attr = None, exclude = (), sign = False):
if sign:
raise NotImplementedError('Signing of %s not implemented' % cls.__name__)
return _descriptor_content(attr, exclude, (
('r', '%s p1aag7VwarGxqctS7/fS0y5FU+s oQZFLYe9e4A7bOkWKR7TaNxb0JE %s %s 9001 0' % (_random_nickname(), _random_date(), _random_ipv4_address())),
('s', 'Fast Named Running Stable Valid'),
))
def _name(self, is_plural = False):
return 'Router status entries (v3)' if is_plural else 'Router status entry (v3)'
def _required_fields(self):
return ('r', 's')
def _single_fields(self):
return ('r', 's', 'v', 'w', 'p', 'pr')
def _compare(self, other, method):
if not isinstance(other, RouterStatusEntryV3):
return False
return method(str(self).strip(), str(other).strip())
def __hash__(self):
return hash(str(self).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __ne__(self, other):
return not self == other
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)
class RouterStatusEntryMicroV3(RouterStatusEntry):
"""
Information about an individual router stored within a microdescriptor
flavored network status document.
:var list or_addresses: **\*** relay's OR addresses, this is a tuple listing
of the form (address (**str**), port (**int**), is_ipv6 (**bool**))
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
:var int measured: bandwidth measured to be available by the relay
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
measurements
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
information that isn't yet recognized
:var dict protocols: mapping of protocols to their supported versions
:var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor
.. versionchanged:: 1.6.0
Added the protocols attribute.
.. versionchanged:: 1.7.0
Added the or_addresses attribute.
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as **None** if undefined
"""
ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
'or_addresses': ([], _parse_a_line),
'bandwidth': (None, _parse_w_line),
'measured': (None, _parse_w_line),
'is_unmeasured': (False, _parse_w_line),
'unrecognized_bandwidth_entries': ([], _parse_w_line),
'protocols': ({}, _parse_pr_line),
'digest': (None, _parse_microdescriptor_m_line),
})
PARSER_FOR_LINE = dict(RouterStatusEntry.PARSER_FOR_LINE, **{
'a': _parse_a_line,
'w': _parse_w_line,
'm': _parse_microdescriptor_m_line,
'pr': _parse_pr_line,
})
@classmethod
def content(cls, attr = None, exclude = (), sign = False):
if sign:
raise NotImplementedError('Signing of %s not implemented' % cls.__name__)
return _descriptor_content(attr, exclude, (
('r', '%s ARIJF2zbqirB9IwsW0mQznccWww %s %s 9001 9030' % (_random_nickname(), _random_date(), _random_ipv4_address())),
('m', 'aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70'),
('s', 'Fast Guard HSDir Named Running Stable V2Dir Valid'),
))
def _name(self, is_plural = False):
return 'Router status entries (micro v3)' if is_plural else 'Router status entry (micro v3)'
def _required_fields(self):
return ('r', 's', 'm')
def _single_fields(self):
return ('r', 's', 'v', 'w', 'm', 'pr')
def _compare(self, other, method):
if not isinstance(other, RouterStatusEntryMicroV3):
return False
return method(str(self).strip(), str(other).strip())
def __hash__(self):
return hash(str(self).strip())
def __eq__(self, other):
return self._compare(other, lambda s, o: s == o)
def __ne__(self, other):
return not self == other
def __lt__(self, other):
return self._compare(other, lambda s, o: s < o)
def __le__(self, other):
return self._compare(other, lambda s, o: s <= o)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,117 @@
# Copyright 2013-2018, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Parsing for `TorDNSEL <https://www.torproject.org/projects/tordnsel.html.en>`_
exit list files.
::
TorDNSEL - Exit list provided by TorDNSEL
"""
import stem.util.connection
import stem.util.str_tools
import stem.util.tor_tools
from stem.descriptor import (
Descriptor,
_read_until_keywords,
_descriptor_components,
)
def _parse_file(tordnsel_file, validate = False, **kwargs):
"""
Iterates over a tordnsel file.
:returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL`
instances in the file
:raises:
* **ValueError** if the contents is malformed and validate is **True**
* **IOError** if the file can't be read
"""
# skip content prior to the first ExitNode
_read_until_keywords('ExitNode', tordnsel_file, skip = True)
while True:
contents = _read_until_keywords('ExitAddress', tordnsel_file)
contents += _read_until_keywords('ExitNode', tordnsel_file)
if contents:
yield TorDNSEL(bytes.join(b'', contents), validate, **kwargs)
else:
break # done parsing file
class TorDNSEL(Descriptor):
"""
TorDNSEL descriptor (`exitlist specification
<https://www.torproject.org/tordnsel/exitlist-spec.txt>`_)
:var str fingerprint: **\*** authority's fingerprint
:var datetime published: **\*** time in UTC when this descriptor was made
:var datetime last_status: **\*** time in UTC when the relay was seen in a v2 network status
:var list exit_addresses: **\*** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time
**\*** attribute is either required when we're parsed with validation or has
a default value, others are left as **None** if undefined
"""
def __init__(self, raw_contents, validate):
super(TorDNSEL, self).__init__(raw_contents)
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
entries = _descriptor_components(raw_contents, validate)
self.fingerprint = None
self.published = None
self.last_status = None
self.exit_addresses = []
self._parse(entries, validate)
def _parse(self, entries, validate):
for keyword, values in list(entries.items()):
value, block_type, block_content = values[0]
if validate and block_content:
raise ValueError('Unexpected block content: %s' % block_content)
if keyword == 'ExitNode':
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % value)
self.fingerprint = value
elif keyword == 'Published':
try:
self.published = stem.util.str_tools._parse_timestamp(value)
except ValueError:
if validate:
raise ValueError("Published time wasn't parsable: %s" % value)
elif keyword == 'LastStatus':
try:
self.last_status = stem.util.str_tools._parse_timestamp(value)
except ValueError:
if validate:
raise ValueError("LastStatus time wasn't parsable: %s" % value)
elif keyword == 'ExitAddress':
for value, block_type, block_content in values:
address, date = value.split(' ', 1)
if validate:
if not stem.util.connection.is_valid_ipv4_address(address):
raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address)
elif block_content:
raise ValueError('Unexpected block content: %s' % block_content)
try:
date = stem.util.str_tools._parse_timestamp(date)
self.exit_addresses.append((address, date))
except ValueError:
if validate:
raise ValueError("ExitAddress found time wasn't parsable: %s" % value)
elif validate:
raise ValueError('Unrecognized keyword: %s' % keyword)