add tor deps
This commit is contained in:
parent
0ee7628a4f
commit
1f23120cc3
91 changed files with 25537 additions and 535 deletions
841
Shared/lib/python3.4/site-packages/stem/descriptor/__init__.py
Normal file
841
Shared/lib/python3.4/site-packages/stem/descriptor/__init__.py
Normal file
|
|
@ -0,0 +1,841 @@
|
|||
# Copyright 2012-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Package for parsing and processing descriptor data.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
parse_file - Parses the descriptors in a file.
|
||||
|
||||
Descriptor - Common parent for all descriptor file types.
|
||||
|- get_path - location of the descriptor on disk if it came from a file
|
||||
|- get_archive_path - location of the descriptor within the archive it came from
|
||||
|- get_bytes - similar to str(), but provides our original bytes content
|
||||
|- get_unrecognized_lines - unparsed descriptor content
|
||||
+- __str__ - string that the descriptor was made from
|
||||
|
||||
.. data:: DocumentHandler (enum)
|
||||
|
||||
Ways in which we can parse a
|
||||
:class:`~stem.descriptor.networkstatus.NetworkStatusDocument`.
|
||||
|
||||
Both **ENTRIES** and **BARE_DOCUMENT** have a 'thin' document, which doesn't
|
||||
have a populated **routers** attribute. This allows for lower memory usage
|
||||
and upfront runtime. However, if read time and memory aren't a concern then
|
||||
**DOCUMENT** can provide you with a fully populated document.
|
||||
|
||||
=================== ===========
|
||||
DocumentHandler Description
|
||||
=================== ===========
|
||||
**ENTRIES** Iterates over the contained :class:`~stem.descriptor.router_status_entry.RouterStatusEntry`. Each has a reference to the bare document it came from (through its **document** attribute).
|
||||
**DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` with the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` it contains (through its **routers** attribute).
|
||||
**BARE_DOCUMENT** :class:`~stem.descriptor.networkstatus.NetworkStatusDocument` **without** a reference to its contents (the :class:`~stem.descriptor.router_status_entry.RouterStatusEntry` are unread).
|
||||
=================== ===========
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'export',
|
||||
'reader',
|
||||
'remote',
|
||||
'extrainfo_descriptor',
|
||||
'server_descriptor',
|
||||
'microdescriptor',
|
||||
'networkstatus',
|
||||
'router_status_entry',
|
||||
'tordnsel',
|
||||
'parse_file',
|
||||
'Descriptor',
|
||||
]
|
||||
|
||||
import base64
|
||||
import codecs
|
||||
import copy
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import tarfile
|
||||
|
||||
import stem.prereq
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
import stem.util.system
|
||||
|
||||
from stem import str_type
|
||||
|
||||
try:
|
||||
# added in python 2.7
|
||||
from collections import OrderedDict
|
||||
except ImportError:
|
||||
from stem.util.ordereddict import OrderedDict
|
||||
|
||||
KEYWORD_CHAR = 'a-zA-Z0-9-'
|
||||
WHITESPACE = ' \t'
|
||||
KEYWORD_LINE = re.compile('^([%s]+)(?:[%s]+(.*))?$' % (KEYWORD_CHAR, WHITESPACE))
|
||||
SPECIFIC_KEYWORD_LINE = '^(%%s)(?:[%s]+(.*))?$' % WHITESPACE
|
||||
PGP_BLOCK_START = re.compile('^-----BEGIN ([%s%s]+)-----$' % (KEYWORD_CHAR, WHITESPACE))
|
||||
PGP_BLOCK_END = '-----END %s-----'
|
||||
|
||||
DocumentHandler = stem.util.enum.UppercaseEnum(
|
||||
'ENTRIES',
|
||||
'DOCUMENT',
|
||||
'BARE_DOCUMENT',
|
||||
)
|
||||
|
||||
|
||||
def parse_file(descriptor_file, descriptor_type = None, validate = False, document_handler = DocumentHandler.ENTRIES, **kwargs):
|
||||
"""
|
||||
Simple function to read the descriptor contents from a file, providing an
|
||||
iterator for its :class:`~stem.descriptor.__init__.Descriptor` contents.
|
||||
|
||||
If you don't provide a **descriptor_type** argument then this automatically
|
||||
tries to determine the descriptor type based on the following...
|
||||
|
||||
* The @type annotation on the first line. These are generally only found in
|
||||
the `CollecTor archives <https://collector.torproject.org/formats.html#relay-descriptors>`_.
|
||||
|
||||
* The filename if it matches something from tor's data directory. For
|
||||
instance, tor's 'cached-descriptors' contains server descriptors.
|
||||
|
||||
This is a handy function for simple usage, but if you're reading multiple
|
||||
descriptor files you might want to consider the
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`.
|
||||
|
||||
Descriptor types include the following, including further minor versions (ie.
|
||||
if we support 1.1 then we also support everything from 1.0 and most things
|
||||
from 1.2, but not 2.0)...
|
||||
|
||||
========================================= =====
|
||||
Descriptor Type Class
|
||||
========================================= =====
|
||||
server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.RelayDescriptor`
|
||||
extra-info 1.0 :class:`~stem.descriptor.extrainfo_descriptor.RelayExtraInfoDescriptor`
|
||||
microdescriptor 1.0 :class:`~stem.descriptor.microdescriptor.Microdescriptor`
|
||||
directory 1.0 **unsupported**
|
||||
network-status-2 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV2` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV2`)
|
||||
dir-key-certificate-3 1.0 :class:`~stem.descriptor.networkstatus.KeyCertificate`
|
||||
network-status-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
network-status-vote-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
network-status-microdesc-consensus-3 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3` (with a :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`)
|
||||
bridge-network-status 1.0 :class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3` (with a :class:`~stem.descriptor.networkstatus.BridgeNetworkStatusDocument`)
|
||||
bridge-server-descriptor 1.0 :class:`~stem.descriptor.server_descriptor.BridgeDescriptor`
|
||||
bridge-extra-info 1.1 or 1.2 :class:`~stem.descriptor.extrainfo_descriptor.BridgeExtraInfoDescriptor`
|
||||
torperf 1.0 **unsupported**
|
||||
bridge-pool-assignment 1.0 **unsupported**
|
||||
tordnsel 1.0 :class:`~stem.descriptor.tordnsel.TorDNSEL`
|
||||
hidden-service-descriptor 1.0 :class:`~stem.descriptor.hidden_service_descriptor.HiddenServiceDescriptor`
|
||||
========================================= =====
|
||||
|
||||
If you're using **python 3** then beware that the open() function defaults to
|
||||
using text mode. **Binary mode** is strongly suggested because it's both
|
||||
faster (by my testing by about 33x) and doesn't do universal newline
|
||||
translation which can make us misparse the document.
|
||||
|
||||
::
|
||||
|
||||
my_descriptor_file = open(descriptor_path, 'rb')
|
||||
|
||||
:param str,file,tarfile descriptor_file: path or opened file with the descriptor contents
|
||||
:param str descriptor_type: `descriptor type <https://collector.torproject.org/formats.html>`_, this is guessed if not provided
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse the :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.__init__.Descriptor` instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **TypeError** if we can't match the contents of the file to a descriptor type
|
||||
* **IOError** if unable to read from the descriptor_file
|
||||
"""
|
||||
|
||||
# Delegate to a helper if this is a path or tarfile.
|
||||
|
||||
handler = None
|
||||
|
||||
if isinstance(descriptor_file, (bytes, str_type)):
|
||||
if stem.util.system.is_tarfile(descriptor_file):
|
||||
handler = _parse_file_for_tar_path
|
||||
else:
|
||||
handler = _parse_file_for_path
|
||||
elif isinstance(descriptor_file, tarfile.TarFile):
|
||||
handler = _parse_file_for_tarfile
|
||||
|
||||
if handler:
|
||||
for desc in handler(descriptor_file, descriptor_type, validate, document_handler, **kwargs):
|
||||
yield desc
|
||||
|
||||
return
|
||||
|
||||
# The tor descriptor specifications do not provide a reliable method for
|
||||
# identifying a descriptor file's type and version so we need to guess
|
||||
# based on its filename. Metrics descriptors, however, can be identified
|
||||
# by an annotation on their first line...
|
||||
# https://trac.torproject.org/5651
|
||||
|
||||
initial_position = descriptor_file.tell()
|
||||
first_line = stem.util.str_tools._to_unicode(descriptor_file.readline().strip())
|
||||
metrics_header_match = re.match('^@type (\S+) (\d+).(\d+)$', first_line)
|
||||
|
||||
if not metrics_header_match:
|
||||
descriptor_file.seek(initial_position)
|
||||
|
||||
descriptor_path = getattr(descriptor_file, 'name', None)
|
||||
filename = '<undefined>' if descriptor_path is None else os.path.basename(descriptor_file.name)
|
||||
file_parser = None
|
||||
|
||||
if descriptor_type is not None:
|
||||
descriptor_type_match = re.match('^(\S+) (\d+).(\d+)$', descriptor_type)
|
||||
|
||||
if descriptor_type_match:
|
||||
desc_type, major_version, minor_version = descriptor_type_match.groups()
|
||||
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
|
||||
else:
|
||||
raise ValueError("The descriptor_type must be of the form '<type> <major_version>.<minor_version>'")
|
||||
elif metrics_header_match:
|
||||
# Metrics descriptor handling
|
||||
|
||||
desc_type, major_version, minor_version = metrics_header_match.groups()
|
||||
file_parser = lambda f: _parse_metrics_file(desc_type, int(major_version), int(minor_version), f, validate, document_handler, **kwargs)
|
||||
else:
|
||||
# Cached descriptor handling. These contain multiple descriptors per file.
|
||||
|
||||
if filename == 'cached-descriptors' or filename == 'cached-descriptors.new':
|
||||
file_parser = lambda f: stem.descriptor.server_descriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == 'cached-extrainfo' or filename == 'cached-extrainfo.new':
|
||||
file_parser = lambda f: stem.descriptor.extrainfo_descriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == 'cached-microdescs' or filename == 'cached-microdescs.new':
|
||||
file_parser = lambda f: stem.descriptor.microdescriptor._parse_file(f, validate = validate, **kwargs)
|
||||
elif filename == 'cached-consensus':
|
||||
file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, validate = validate, document_handler = document_handler, **kwargs)
|
||||
elif filename == 'cached-microdesc-consensus':
|
||||
file_parser = lambda f: stem.descriptor.networkstatus._parse_file(f, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs)
|
||||
|
||||
if file_parser:
|
||||
for desc in file_parser(descriptor_file):
|
||||
if descriptor_path is not None:
|
||||
desc._set_path(os.path.abspath(descriptor_path))
|
||||
|
||||
yield desc
|
||||
|
||||
return
|
||||
|
||||
# Not recognized as a descriptor file.
|
||||
|
||||
raise TypeError("Unable to determine the descriptor's type. filename: '%s', first line: '%s'" % (filename, first_line))
|
||||
|
||||
|
||||
def _parse_file_for_path(descriptor_file, *args, **kwargs):
|
||||
with open(descriptor_file, 'rb') as desc_file:
|
||||
for desc in parse_file(desc_file, *args, **kwargs):
|
||||
yield desc
|
||||
|
||||
|
||||
def _parse_file_for_tar_path(descriptor_file, *args, **kwargs):
|
||||
# TODO: use 'with' for tarfile after dropping python 2.6 support
|
||||
tar_file = tarfile.open(descriptor_file)
|
||||
|
||||
try:
|
||||
for desc in parse_file(tar_file, *args, **kwargs):
|
||||
desc._set_path(os.path.abspath(descriptor_file))
|
||||
yield desc
|
||||
finally:
|
||||
if tar_file:
|
||||
tar_file.close()
|
||||
|
||||
|
||||
def _parse_file_for_tarfile(descriptor_file, *args, **kwargs):
|
||||
for tar_entry in descriptor_file:
|
||||
if tar_entry.isfile():
|
||||
entry = descriptor_file.extractfile(tar_entry)
|
||||
|
||||
try:
|
||||
for desc in parse_file(entry, *args, **kwargs):
|
||||
desc._set_archive_path(entry.name)
|
||||
yield desc
|
||||
finally:
|
||||
entry.close()
|
||||
|
||||
|
||||
def _parse_metrics_file(descriptor_type, major_version, minor_version, descriptor_file, validate, document_handler, **kwargs):
|
||||
# Parses descriptor files from metrics, yielding individual descriptors. This
|
||||
# throws a TypeError if the descriptor_type or version isn't recognized.
|
||||
|
||||
if descriptor_type == 'server-descriptor' and major_version == 1:
|
||||
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'bridge-server-descriptor' and major_version == 1:
|
||||
for desc in stem.descriptor.server_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'extra-info' and major_version == 1:
|
||||
for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = False, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'microdescriptor' and major_version == 1:
|
||||
for desc in stem.descriptor.microdescriptor._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'bridge-extra-info' and major_version == 1:
|
||||
# version 1.1 introduced a 'transport' field...
|
||||
# https://trac.torproject.org/6257
|
||||
|
||||
for desc in stem.descriptor.extrainfo_descriptor._parse_file(descriptor_file, is_bridge = True, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'network-status-2' and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV2
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'dir-key-certificate-3' and major_version == 1:
|
||||
for desc in stem.descriptor.networkstatus._parse_file_key_certs(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type in ('network-status-consensus-3', 'network-status-vote-3') and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'network-status-microdesc-consensus-3' and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.NetworkStatusDocumentV3
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, is_microdescriptor = True, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'bridge-network-status' and major_version == 1:
|
||||
document_type = stem.descriptor.networkstatus.BridgeNetworkStatusDocument
|
||||
|
||||
for desc in stem.descriptor.networkstatus._parse_file(descriptor_file, document_type, validate = validate, document_handler = document_handler, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'tordnsel' and major_version == 1:
|
||||
document_type = stem.descriptor.tordnsel.TorDNSEL
|
||||
|
||||
for desc in stem.descriptor.tordnsel._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
elif descriptor_type == 'hidden-service-descriptor' and major_version == 1:
|
||||
document_type = stem.descriptor.hidden_service_descriptor.HiddenServiceDescriptor
|
||||
|
||||
for desc in stem.descriptor.hidden_service_descriptor._parse_file(descriptor_file, validate = validate, **kwargs):
|
||||
yield desc
|
||||
else:
|
||||
raise TypeError("Unrecognized metrics descriptor format. type: '%s', version: '%i.%i'" % (descriptor_type, major_version, minor_version))
|
||||
|
||||
|
||||
def _value(line, entries):
|
||||
return entries[line][0][0]
|
||||
|
||||
|
||||
def _values(line, entries):
|
||||
return [entry[0] for entry in entries[line]]
|
||||
|
||||
|
||||
def _parse_simple_line(keyword, attribute):
|
||||
def _parse(descriptor, entries):
|
||||
setattr(descriptor, attribute, _value(keyword, entries))
|
||||
|
||||
return _parse
|
||||
|
||||
|
||||
def _parse_bytes_line(keyword, attribute):
|
||||
def _parse(descriptor, entries):
|
||||
line_match = re.search(stem.util.str_tools._to_bytes('^(opt )?%s(?:[%s]+(.*))?$' % (keyword, WHITESPACE)), descriptor.get_bytes(), re.MULTILINE)
|
||||
result = None
|
||||
|
||||
if line_match:
|
||||
value = line_match.groups()[1]
|
||||
result = b'' if value is None else value
|
||||
|
||||
setattr(descriptor, attribute, result)
|
||||
|
||||
return _parse
|
||||
|
||||
|
||||
def _parse_timestamp_line(keyword, attribute):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS
|
||||
|
||||
def _parse(descriptor, entries):
|
||||
value = _value(keyword, entries)
|
||||
|
||||
try:
|
||||
setattr(descriptor, attribute, stem.util.str_tools._parse_timestamp(value))
|
||||
except ValueError:
|
||||
raise ValueError("Timestamp on %s line wasn't parsable: %s %s" % (keyword, keyword, value))
|
||||
|
||||
return _parse
|
||||
|
||||
|
||||
def _parse_forty_character_hex(keyword, attribute):
|
||||
# format of fingerprints, sha1 digests, etc
|
||||
|
||||
def _parse(descriptor, entries):
|
||||
value = _value(keyword, entries)
|
||||
|
||||
if not stem.util.tor_tools.is_hex_digits(value, 40):
|
||||
raise ValueError('%s line had an invalid value (should be 40 hex characters): %s %s' % (keyword, keyword, value))
|
||||
|
||||
setattr(descriptor, attribute, value)
|
||||
|
||||
return _parse
|
||||
|
||||
|
||||
def _parse_key_block(keyword, attribute, expected_block_type, value_attribute = None):
|
||||
def _parse(descriptor, entries):
|
||||
value, block_type, block_contents = entries[keyword][0]
|
||||
|
||||
if not block_contents or block_type != expected_block_type:
|
||||
raise ValueError("'%s' should be followed by a %s block, but was a %s" % (keyword, expected_block_type, block_type))
|
||||
|
||||
setattr(descriptor, attribute, block_contents)
|
||||
|
||||
if value_attribute:
|
||||
setattr(descriptor, value_attribute, value)
|
||||
|
||||
return _parse
|
||||
|
||||
|
||||
class Descriptor(object):
|
||||
"""
|
||||
Common parent for all types of descriptors.
|
||||
"""
|
||||
|
||||
ATTRIBUTES = {} # mapping of 'attribute' => (default_value, parsing_function)
|
||||
PARSER_FOR_LINE = {} # line keyword to its associated parsing function
|
||||
|
||||
def __init__(self, contents, lazy_load = False):
|
||||
self._path = None
|
||||
self._archive_path = None
|
||||
self._raw_contents = contents
|
||||
self._lazy_loading = lazy_load
|
||||
self._entries = {}
|
||||
self._unrecognized_lines = []
|
||||
|
||||
def get_path(self):
|
||||
"""
|
||||
Provides the absolute path that we loaded this descriptor from.
|
||||
|
||||
:returns: **str** with the absolute path of the descriptor source
|
||||
"""
|
||||
|
||||
return self._path
|
||||
|
||||
def get_archive_path(self):
|
||||
"""
|
||||
If this descriptor came from an archive then provides its path within the
|
||||
archive. This is only set if the descriptor came from a
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`, and is **None** if this
|
||||
descriptor didn't come from an archive.
|
||||
|
||||
:returns: **str** with the descriptor's path within the archive
|
||||
"""
|
||||
|
||||
return self._archive_path
|
||||
|
||||
def get_bytes(self):
|
||||
"""
|
||||
Provides the ASCII **bytes** of the descriptor. This only differs from
|
||||
**str()** if you're running python 3.x, in which case **str()** provides a
|
||||
**unicode** string.
|
||||
|
||||
:returns: **bytes** for the descriptor's contents
|
||||
"""
|
||||
|
||||
return self._raw_contents
|
||||
|
||||
def get_unrecognized_lines(self):
|
||||
"""
|
||||
Provides a list of lines that were either ignored or had data that we did
|
||||
not know how to process. This is most common due to new descriptor fields
|
||||
that this library does not yet know how to process. Patches welcome!
|
||||
|
||||
:returns: **list** of lines of unrecognized content
|
||||
"""
|
||||
|
||||
if self._lazy_loading:
|
||||
# we need to go ahead and parse the whole document to figure this out
|
||||
self._parse(self._entries, False)
|
||||
self._lazy_loading = False
|
||||
|
||||
return list(self._unrecognized_lines)
|
||||
|
||||
def _parse(self, entries, validate, parser_for_line = None):
|
||||
"""
|
||||
Parses a series of 'keyword => (value, pgp block)' mappings and applies
|
||||
them as attributes.
|
||||
|
||||
:param dict entries: descriptor contents to be applied
|
||||
:param bool validate: checks the validity of descriptor content if True
|
||||
:param dict parsers: mapping of lines to the function for parsing it
|
||||
|
||||
:raises: **ValueError** if an error occurs in validation
|
||||
"""
|
||||
|
||||
if parser_for_line is None:
|
||||
parser_for_line = self.PARSER_FOR_LINE
|
||||
|
||||
# set defaults
|
||||
|
||||
for attr in self.ATTRIBUTES:
|
||||
if not hasattr(self, attr):
|
||||
setattr(self, attr, copy.copy(self.ATTRIBUTES[attr][0]))
|
||||
|
||||
for keyword, values in list(entries.items()):
|
||||
try:
|
||||
if keyword in parser_for_line:
|
||||
parser_for_line[keyword](self, entries)
|
||||
else:
|
||||
for value, block_type, block_contents in values:
|
||||
line = '%s %s' % (keyword, value)
|
||||
|
||||
if block_contents:
|
||||
line += '\n%s' % block_contents
|
||||
|
||||
self._unrecognized_lines.append(line)
|
||||
except ValueError as exc:
|
||||
if validate:
|
||||
raise exc
|
||||
|
||||
def _set_path(self, path):
|
||||
self._path = path
|
||||
|
||||
def _set_archive_path(self, path):
|
||||
self._archive_path = path
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
return str(type(self))
|
||||
|
||||
def _digest_for_signature(self, signing_key, signature):
|
||||
"""
|
||||
Provides the signed digest we should have given this key and signature.
|
||||
|
||||
:param str signing_key: key block used to make this signature
|
||||
:param str signature: signed digest for this descriptor content
|
||||
|
||||
:returns: the digest string encoded in uppercase hex
|
||||
|
||||
:raises: ValueError if unable to provide a validly signed digest
|
||||
"""
|
||||
|
||||
if not stem.prereq.is_crypto_available():
|
||||
raise ValueError('Generating the signed digest requires pycrypto')
|
||||
|
||||
from Crypto.Util import asn1
|
||||
from Crypto.Util.number import bytes_to_long, long_to_bytes
|
||||
|
||||
# get the ASN.1 sequence
|
||||
|
||||
seq = asn1.DerSequence()
|
||||
seq.decode(_bytes_for_block(signing_key))
|
||||
modulus, public_exponent = seq[0], seq[1]
|
||||
|
||||
sig_as_bytes = _bytes_for_block(signature)
|
||||
sig_as_long = bytes_to_long(sig_as_bytes) # convert signature to an int
|
||||
blocksize = 128 # block size will always be 128 for a 1024 bit key
|
||||
|
||||
# use the public exponent[e] & the modulus[n] to decrypt the int
|
||||
|
||||
decrypted_int = pow(sig_as_long, public_exponent, modulus)
|
||||
|
||||
# convert the int to a byte array
|
||||
|
||||
decrypted_bytes = long_to_bytes(decrypted_int, blocksize)
|
||||
|
||||
############################################################################
|
||||
# The decrypted bytes should have a structure exactly along these lines.
|
||||
# 1 byte - [null '\x00']
|
||||
# 1 byte - [block type identifier '\x01'] - Should always be 1
|
||||
# N bytes - [padding '\xFF' ]
|
||||
# 1 byte - [separator '\x00' ]
|
||||
# M bytes - [message]
|
||||
# Total - 128 bytes
|
||||
# More info here http://www.ietf.org/rfc/rfc2313.txt
|
||||
# esp the Notes in section 8.1
|
||||
############################################################################
|
||||
|
||||
try:
|
||||
if decrypted_bytes.index(b'\x00\x01') != 0:
|
||||
raise ValueError('Verification failed, identifier missing')
|
||||
except ValueError:
|
||||
raise ValueError('Verification failed, malformed data')
|
||||
|
||||
try:
|
||||
identifier_offset = 2
|
||||
|
||||
# find the separator
|
||||
seperator_index = decrypted_bytes.index(b'\x00', identifier_offset)
|
||||
except ValueError:
|
||||
raise ValueError('Verification failed, seperator not found')
|
||||
|
||||
digest_hex = codecs.encode(decrypted_bytes[seperator_index + 1:], 'hex_codec')
|
||||
return stem.util.str_tools._to_unicode(digest_hex.upper())
|
||||
|
||||
def _digest_for_content(self, start, end):
|
||||
"""
|
||||
Provides the digest of our descriptor's content in a given range.
|
||||
|
||||
:param bytes start: start of the range to generate a digest for
|
||||
:param bytes end: end of the range to generate a digest for
|
||||
|
||||
:returns: the digest string encoded in uppercase hex
|
||||
|
||||
:raises: ValueError if the digest canot be calculated
|
||||
"""
|
||||
|
||||
raw_descriptor = self.get_bytes()
|
||||
|
||||
start_index = raw_descriptor.find(start)
|
||||
end_index = raw_descriptor.find(end, start_index)
|
||||
|
||||
if start_index == -1:
|
||||
raise ValueError("Digest is for the range starting with '%s' but that isn't in our descriptor" % start)
|
||||
elif end_index == -1:
|
||||
raise ValueError("Digest is for the range ending with '%s' but that isn't in our descriptor" % end)
|
||||
|
||||
digest_content = raw_descriptor[start_index:end_index + len(end)]
|
||||
digest_hash = hashlib.sha1(stem.util.str_tools._to_bytes(digest_content))
|
||||
return stem.util.str_tools._to_unicode(digest_hash.hexdigest().upper())
|
||||
|
||||
def __getattr__(self, name):
|
||||
# If attribute isn't already present we might be lazy loading it...
|
||||
|
||||
if self._lazy_loading and name in self.ATTRIBUTES:
|
||||
default, parsing_function = self.ATTRIBUTES[name]
|
||||
|
||||
try:
|
||||
parsing_function(self, self._entries)
|
||||
except (ValueError, KeyError):
|
||||
try:
|
||||
# despite having a validation failure check to see if we set something
|
||||
return super(Descriptor, self).__getattribute__(name)
|
||||
except AttributeError:
|
||||
setattr(self, name, copy.copy(default))
|
||||
|
||||
return super(Descriptor, self).__getattribute__(name)
|
||||
|
||||
def __str__(self):
|
||||
if stem.prereq.is_python_3():
|
||||
return stem.util.str_tools._to_unicode(self._raw_contents)
|
||||
else:
|
||||
return self._raw_contents
|
||||
|
||||
|
||||
def _read_until_keywords(keywords, descriptor_file, inclusive = False, ignore_first = False, skip = False, end_position = None, include_ending_keyword = False):
|
||||
"""
|
||||
Reads from the descriptor file until we get to one of the given keywords or reach the
|
||||
end of the file.
|
||||
|
||||
:param str,list keywords: keyword(s) we want to read until
|
||||
:param file descriptor_file: file with the descriptor content
|
||||
:param bool inclusive: includes the line with the keyword if True
|
||||
:param bool ignore_first: doesn't check if the first line read has one of the
|
||||
given keywords
|
||||
:param bool skip: skips buffering content, returning None
|
||||
:param int end_position: end if we reach this point in the file
|
||||
:param bool include_ending_keyword: provides the keyword we broke on if **True**
|
||||
|
||||
:returns: **list** with the lines until we find one of the keywords, this is
|
||||
a two value tuple with the ending keyword if include_ending_keyword is
|
||||
**True**
|
||||
"""
|
||||
|
||||
if skip:
|
||||
content = None
|
||||
content_append = lambda x: None
|
||||
else:
|
||||
content = []
|
||||
content_append = content.append
|
||||
|
||||
ending_keyword = None
|
||||
|
||||
if isinstance(keywords, (bytes, str_type)):
|
||||
keywords = (keywords,)
|
||||
|
||||
if ignore_first:
|
||||
first_line = descriptor_file.readline()
|
||||
|
||||
if first_line:
|
||||
content_append(first_line)
|
||||
|
||||
keyword_match = re.compile(SPECIFIC_KEYWORD_LINE % '|'.join(keywords))
|
||||
|
||||
while True:
|
||||
last_position = descriptor_file.tell()
|
||||
|
||||
if end_position and last_position >= end_position:
|
||||
break
|
||||
|
||||
line = descriptor_file.readline()
|
||||
|
||||
if not line:
|
||||
break # EOF
|
||||
|
||||
line_match = keyword_match.match(stem.util.str_tools._to_unicode(line))
|
||||
|
||||
if line_match:
|
||||
ending_keyword = line_match.groups()[0]
|
||||
|
||||
if not inclusive:
|
||||
descriptor_file.seek(last_position)
|
||||
else:
|
||||
content_append(line)
|
||||
|
||||
break
|
||||
else:
|
||||
content_append(line)
|
||||
|
||||
if include_ending_keyword:
|
||||
return (content, ending_keyword)
|
||||
else:
|
||||
return content
|
||||
|
||||
|
||||
def _bytes_for_block(content):
|
||||
"""
|
||||
Provides the base64 decoded content of a pgp-style block.
|
||||
|
||||
:param str content: block to be decoded
|
||||
|
||||
:returns: decoded block content
|
||||
|
||||
:raises: **TypeError** if this isn't base64 encoded content
|
||||
"""
|
||||
|
||||
# strip the '-----BEGIN RSA PUBLIC KEY-----' header and footer
|
||||
|
||||
content = ''.join(content.split('\n')[1:-1])
|
||||
|
||||
return base64.b64decode(stem.util.str_tools._to_bytes(content))
|
||||
|
||||
|
||||
def _get_pseudo_pgp_block(remaining_contents):
|
||||
"""
|
||||
Checks if given contents begins with a pseudo-Open-PGP-style block and, if
|
||||
so, pops it off and provides it back to the caller.
|
||||
|
||||
:param list remaining_contents: lines to be checked for a public key block
|
||||
|
||||
:returns: **tuple** of the (block_type, content) or None if it doesn't exist
|
||||
|
||||
:raises: **ValueError** if the contents starts with a key block but it's
|
||||
malformed (for instance, if it lacks an ending line)
|
||||
"""
|
||||
|
||||
if not remaining_contents:
|
||||
return None # nothing left
|
||||
|
||||
block_match = PGP_BLOCK_START.match(remaining_contents[0])
|
||||
|
||||
if block_match:
|
||||
block_type = block_match.groups()[0]
|
||||
block_lines = []
|
||||
end_line = PGP_BLOCK_END % block_type
|
||||
|
||||
while True:
|
||||
if not remaining_contents:
|
||||
raise ValueError("Unterminated pgp style block (looking for '%s'):\n%s" % (end_line, '\n'.join(block_lines)))
|
||||
|
||||
line = remaining_contents.pop(0)
|
||||
block_lines.append(line)
|
||||
|
||||
if line == end_line:
|
||||
return (block_type, '\n'.join(block_lines))
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _get_descriptor_components(raw_contents, validate, extra_keywords = ()):
|
||||
"""
|
||||
Initial breakup of the server descriptor contents to make parsing easier.
|
||||
|
||||
A descriptor contains a series of 'keyword lines' which are simply a keyword
|
||||
followed by an optional value. Lines can also be followed by a signature
|
||||
block.
|
||||
|
||||
To get a sub-listing with just certain keywords use extra_keywords. This can
|
||||
be useful if we care about their relative ordering with respect to each
|
||||
other. For instance, we care about the ordering of 'accept' and 'reject'
|
||||
entries because this influences the resulting exit policy, but for everything
|
||||
else in server descriptors the order does not matter.
|
||||
|
||||
:param str raw_contents: descriptor content provided by the relay
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
True, skips these checks otherwise
|
||||
:param list extra_keywords: entity keywords to put into a separate listing
|
||||
with ordering intact
|
||||
|
||||
:returns:
|
||||
**collections.OrderedDict** with the 'keyword => (value, pgp key) entries'
|
||||
mappings. If a extra_keywords was provided then this instead provides a two
|
||||
value tuple, the second being a list of those entries.
|
||||
"""
|
||||
|
||||
if isinstance(raw_contents, bytes):
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
|
||||
entries = OrderedDict()
|
||||
extra_entries = [] # entries with a keyword in extra_keywords
|
||||
remaining_lines = raw_contents.split('\n')
|
||||
|
||||
while remaining_lines:
|
||||
line = remaining_lines.pop(0)
|
||||
|
||||
# V2 network status documents explicitly can contain blank lines...
|
||||
#
|
||||
# "Implementations MAY insert blank lines for clarity between sections;
|
||||
# these blank lines are ignored."
|
||||
#
|
||||
# ... and server descriptors end with an extra newline. But other documents
|
||||
# don't say how blank lines should be handled so globally ignoring them.
|
||||
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Some lines have an 'opt ' for backward compatibility. They should be
|
||||
# ignored. This prefix is being removed in...
|
||||
# https://trac.torproject.org/projects/tor/ticket/5124
|
||||
|
||||
if line.startswith('opt '):
|
||||
line = line[4:]
|
||||
|
||||
line_match = KEYWORD_LINE.match(line)
|
||||
|
||||
if not line_match:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise ValueError('Line contains invalid characters: %s' % line)
|
||||
|
||||
keyword, value = line_match.groups()
|
||||
|
||||
if value is None:
|
||||
value = ''
|
||||
|
||||
try:
|
||||
block_attr = _get_pseudo_pgp_block(remaining_lines)
|
||||
|
||||
if block_attr:
|
||||
block_type, block_contents = block_attr
|
||||
else:
|
||||
block_type, block_contents = None, None
|
||||
except ValueError as exc:
|
||||
if not validate:
|
||||
continue
|
||||
|
||||
raise exc
|
||||
|
||||
if keyword in extra_keywords:
|
||||
extra_entries.append('%s %s' % (keyword, value))
|
||||
else:
|
||||
entries.setdefault(keyword, []).append((value, block_type, block_contents))
|
||||
|
||||
if extra_keywords:
|
||||
return entries, extra_entries
|
||||
else:
|
||||
return entries
|
||||
|
||||
# importing at the end to avoid circular dependencies on our Descriptor class
|
||||
|
||||
import stem.descriptor.server_descriptor
|
||||
import stem.descriptor.extrainfo_descriptor
|
||||
import stem.descriptor.networkstatus
|
||||
import stem.descriptor.microdescriptor
|
||||
import stem.descriptor.tordnsel
|
||||
import stem.descriptor.hidden_service_descriptor
|
||||
110
Shared/lib/python3.4/site-packages/stem/descriptor/export.py
Normal file
110
Shared/lib/python3.4/site-packages/stem/descriptor/export.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
# Copyright 2012-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Toolkit for exporting descriptors to other formats.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
export_csv - Exports descriptors to a CSV
|
||||
export_csv_file - Writes exported CSV output to a file
|
||||
"""
|
||||
|
||||
import csv
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from io import StringIO
|
||||
|
||||
import stem.descriptor
|
||||
import stem.prereq
|
||||
|
||||
|
||||
class _ExportDialect(csv.excel):
|
||||
lineterminator = '\n'
|
||||
|
||||
|
||||
def export_csv(descriptors, included_fields = (), excluded_fields = (), header = True):
|
||||
"""
|
||||
Provides a newline separated CSV for one or more descriptors. If simply
|
||||
provided with descriptors then the CSV contains all of its attributes,
|
||||
labeled with a header row. Either 'included_fields' or 'excluded_fields' can
|
||||
be used for more granular control over its attributes and the order.
|
||||
|
||||
:param Descriptor,list descriptors: either a
|
||||
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
|
||||
:param list included_fields: attributes to include in the csv
|
||||
:param list excluded_fields: attributes to exclude from the csv
|
||||
:param bool header: if **True** then the first line will be a comma separated
|
||||
list of the attribute names (**only supported in python 2.7 and higher**)
|
||||
|
||||
:returns: **str** of the CSV for the descriptors, one per line
|
||||
:raises: **ValueError** if descriptors contain more than one descriptor type
|
||||
"""
|
||||
|
||||
output_buffer = StringIO()
|
||||
export_csv_file(output_buffer, descriptors, included_fields, excluded_fields, header)
|
||||
return output_buffer.getvalue()
|
||||
|
||||
|
||||
def export_csv_file(output_file, descriptors, included_fields = (), excluded_fields = (), header = True):
|
||||
"""
|
||||
Similar to :func:`stem.descriptor.export.export_csv`, except that the CSV is
|
||||
written directly to a file.
|
||||
|
||||
:param file output_file: file to be written to
|
||||
:param Descriptor,list descriptors: either a
|
||||
:class:`~stem.descriptor.Descriptor` or list of descriptors to be exported
|
||||
:param list included_fields: attributes to include in the csv
|
||||
:param list excluded_fields: attributes to exclude from the csv
|
||||
:param bool header: if **True** then the first line will be a comma separated
|
||||
list of the attribute names (**only supported in python 2.7 and higher**)
|
||||
|
||||
:returns: **str** of the CSV for the descriptors, one per line
|
||||
:raises: **ValueError** if descriptors contain more than one descriptor type
|
||||
"""
|
||||
|
||||
if isinstance(descriptors, stem.descriptor.Descriptor):
|
||||
descriptors = (descriptors,)
|
||||
|
||||
if not descriptors:
|
||||
return
|
||||
|
||||
descriptor_type = type(descriptors[0])
|
||||
descriptor_type_label = descriptor_type.__name__
|
||||
included_fields = list(included_fields)
|
||||
|
||||
# If the user didn't specify the fields to include then export everything,
|
||||
# ordered alphabetically. If they did specify fields then make sure that
|
||||
# they exist.
|
||||
|
||||
desc_attr = sorted(vars(descriptors[0]).keys())
|
||||
|
||||
if included_fields:
|
||||
for field in included_fields:
|
||||
if field not in desc_attr:
|
||||
raise ValueError("%s does not have a '%s' attribute, valid fields are: %s" % (descriptor_type_label, field, ', '.join(desc_attr)))
|
||||
else:
|
||||
included_fields = [attr for attr in desc_attr if not attr.startswith('_')]
|
||||
|
||||
for field in excluded_fields:
|
||||
try:
|
||||
included_fields.remove(field)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
writer = csv.DictWriter(output_file, included_fields, dialect = _ExportDialect(), extrasaction='ignore')
|
||||
|
||||
if header and stem.prereq.is_python_27():
|
||||
writer.writeheader()
|
||||
|
||||
for desc in descriptors:
|
||||
if not isinstance(desc, stem.descriptor.Descriptor):
|
||||
raise ValueError('Unable to export a descriptor CSV since %s is not a descriptor.' % type(desc).__name__)
|
||||
elif descriptor_type != type(desc):
|
||||
raise ValueError('To export a descriptor CSV all of the descriptors must be of the same type. First descriptor was a %s but we later got a %s.' % (descriptor_type_label, type(desc)))
|
||||
|
||||
writer.writerow(vars(desc))
|
||||
|
|
@ -0,0 +1,939 @@
|
|||
# Copyright 2012-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor extra-info descriptors. These are published by relays whenever
|
||||
their server descriptor is published and have a similar format. However, unlike
|
||||
server descriptors these don't contain information that Tor clients require to
|
||||
function and as such aren't fetched by default.
|
||||
|
||||
Defined in section 2.2 of the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_,
|
||||
extra-info descriptors contain interesting but non-vital information such as
|
||||
usage statistics. Tor clients cannot request these documents for bridges.
|
||||
|
||||
Extra-info descriptors are available from a few sources...
|
||||
|
||||
* If you have 'DownloadExtraInfo 1' in your torrc...
|
||||
|
||||
* control port via 'GETINFO extra-info/digest/\*' queries
|
||||
* the 'cached-extrainfo' file in tor's data directory
|
||||
|
||||
* Archived descriptors provided by CollecTor
|
||||
(https://collector.torproject.org/).
|
||||
|
||||
* Directory authorities and mirrors via their DirPort.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ExtraInfoDescriptor - Tor extra-info descriptor.
|
||||
|- RelayExtraInfoDescriptor - Extra-info descriptor for a relay.
|
||||
|- BridgeExtraInfoDescriptor - Extra-info descriptor for a bridge.
|
||||
|
|
||||
+- digest - calculates the upper-case hex digest value for our content
|
||||
|
||||
.. data:: DirResponse (enum)
|
||||
|
||||
Enumeration for known statuses for ExtraInfoDescriptor's dir_*_responses.
|
||||
|
||||
=================== ===========
|
||||
DirResponse Description
|
||||
=================== ===========
|
||||
**OK** network status requests that were answered
|
||||
**NOT_ENOUGH_SIGS** network status wasn't signed by enough authorities
|
||||
**UNAVAILABLE** requested network status was unavailable
|
||||
**NOT_FOUND** requested network status was not found
|
||||
**NOT_MODIFIED** network status unmodified since If-Modified-Since time
|
||||
**BUSY** directory was busy
|
||||
=================== ===========
|
||||
|
||||
.. data:: DirStat (enum)
|
||||
|
||||
Enumeration for known stats for ExtraInfoDescriptor's dir_*_direct_dl and
|
||||
dir_*_tunneled_dl.
|
||||
|
||||
===================== ===========
|
||||
DirStat Description
|
||||
===================== ===========
|
||||
**COMPLETE** requests that completed successfully
|
||||
**TIMEOUT** requests that didn't complete within a ten minute timeout
|
||||
**RUNNING** requests still in process when measurement's taken
|
||||
**MIN** smallest rate at which a descriptor was downloaded in B/s
|
||||
**MAX** largest rate at which a descriptor was downloaded in B/s
|
||||
**D1-4** and **D6-9** rate of the slowest x/10 download rates in B/s
|
||||
**Q1** and **Q3** rate of the slowest and fastest quarter download rates in B/s
|
||||
**MD** median download rate in B/s
|
||||
===================== ===========
|
||||
"""
|
||||
|
||||
import functools
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.enum
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_read_until_keywords,
|
||||
_get_descriptor_components,
|
||||
_value,
|
||||
_values,
|
||||
_parse_timestamp_line,
|
||||
_parse_forty_character_hex,
|
||||
_parse_key_block,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# known statuses for dirreq-v2-resp and dirreq-v3-resp...
|
||||
DirResponse = stem.util.enum.Enum(
|
||||
('OK', 'ok'),
|
||||
('NOT_ENOUGH_SIGS', 'not-enough-sigs'),
|
||||
('UNAVAILABLE', 'unavailable'),
|
||||
('NOT_FOUND', 'not-found'),
|
||||
('NOT_MODIFIED', 'not-modified'),
|
||||
('BUSY', 'busy'),
|
||||
)
|
||||
|
||||
# known stats for dirreq-v2/3-direct-dl and dirreq-v2/3-tunneled-dl...
|
||||
dir_stats = ['complete', 'timeout', 'running', 'min', 'max', 'q1', 'q3', 'md']
|
||||
dir_stats += ['d%i' % i for i in range(1, 5)]
|
||||
dir_stats += ['d%i' % i for i in range(6, 10)]
|
||||
DirStat = stem.util.enum.Enum(*[(stat.upper(), stat) for stat in dir_stats])
|
||||
|
||||
# relay descriptors must have exactly one of the following
|
||||
REQUIRED_FIELDS = (
|
||||
'extra-info',
|
||||
'published',
|
||||
'router-signature',
|
||||
)
|
||||
|
||||
# optional entries that can appear at most once
|
||||
SINGLE_FIELDS = (
|
||||
'read-history',
|
||||
'write-history',
|
||||
'geoip-db-digest',
|
||||
'geoip6-db-digest',
|
||||
'bridge-stats-end',
|
||||
'bridge-ips',
|
||||
'dirreq-stats-end',
|
||||
'dirreq-v2-ips',
|
||||
'dirreq-v3-ips',
|
||||
'dirreq-v2-reqs',
|
||||
'dirreq-v3-reqs',
|
||||
'dirreq-v2-share',
|
||||
'dirreq-v3-share',
|
||||
'dirreq-v2-resp',
|
||||
'dirreq-v3-resp',
|
||||
'dirreq-v2-direct-dl',
|
||||
'dirreq-v3-direct-dl',
|
||||
'dirreq-v2-tunneled-dl',
|
||||
'dirreq-v3-tunneled-dl',
|
||||
'dirreq-read-history',
|
||||
'dirreq-write-history',
|
||||
'entry-stats-end',
|
||||
'entry-ips',
|
||||
'cell-stats-end',
|
||||
'cell-processed-cells',
|
||||
'cell-queued-cells',
|
||||
'cell-time-in-queue',
|
||||
'cell-circuits-per-decile',
|
||||
'conn-bi-direct',
|
||||
'exit-stats-end',
|
||||
'exit-kibibytes-written',
|
||||
'exit-kibibytes-read',
|
||||
'exit-streams-opened',
|
||||
)
|
||||
|
||||
|
||||
_timestamp_re = re.compile('^(.*) \(([0-9]+) s\)( .*)?$')
|
||||
_locale_re = re.compile('^[a-zA-Z0-9\?]{2}$')
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
|
||||
"""
|
||||
Iterates over the extra-info descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool is_bridge: parses the file as being a bridge descriptor
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.extrainfo_descriptor.ExtraInfoDescriptor`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
if not is_bridge:
|
||||
extrainfo_content = _read_until_keywords('router-signature', descriptor_file)
|
||||
|
||||
# we've reached the 'router-signature', now include the pgp style block
|
||||
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
extrainfo_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
else:
|
||||
extrainfo_content = _read_until_keywords('router-digest', descriptor_file, True)
|
||||
|
||||
if extrainfo_content:
|
||||
if extrainfo_content[0].startswith(b'@type'):
|
||||
extrainfo_content = extrainfo_content[1:]
|
||||
|
||||
if is_bridge:
|
||||
yield BridgeExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs)
|
||||
else:
|
||||
yield RelayExtraInfoDescriptor(bytes.join(b'', extrainfo_content), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
def _parse_timestamp_and_interval(keyword, content):
|
||||
"""
|
||||
Parses a 'YYYY-MM-DD HH:MM:SS (NSEC s) *' entry.
|
||||
|
||||
:param str keyword: line's keyword
|
||||
:param str content: line content to be parsed
|
||||
|
||||
:returns: **tuple** of the form (timestamp (**datetime**), interval
|
||||
(**int**), remaining content (**str**))
|
||||
|
||||
:raises: **ValueError** if the content is malformed
|
||||
"""
|
||||
|
||||
line = '%s %s' % (keyword, content)
|
||||
content_match = _timestamp_re.match(content)
|
||||
|
||||
if not content_match:
|
||||
raise ValueError('Malformed %s line: %s' % (keyword, line))
|
||||
|
||||
timestamp_str, interval, remainder = content_match.groups()
|
||||
|
||||
if remainder:
|
||||
remainder = remainder[1:] # remove leading space
|
||||
|
||||
if not interval.isdigit():
|
||||
raise ValueError("%s line's interval wasn't a number: %s" % (keyword, line))
|
||||
|
||||
try:
|
||||
timestamp = stem.util.str_tools._parse_timestamp(timestamp_str)
|
||||
return timestamp, int(interval), remainder
|
||||
except ValueError:
|
||||
raise ValueError("%s line's timestamp wasn't parsable: %s" % (keyword, line))
|
||||
|
||||
|
||||
def _parse_extra_info_line(descriptor, entries):
|
||||
# "extra-info" Nickname Fingerprint
|
||||
|
||||
value = _value('extra-info', entries)
|
||||
extra_info_comp = value.split()
|
||||
|
||||
if len(extra_info_comp) < 2:
|
||||
raise ValueError('Extra-info line must have two values: extra-info %s' % value)
|
||||
elif not stem.util.tor_tools.is_valid_nickname(extra_info_comp[0]):
|
||||
raise ValueError("Extra-info line entry isn't a valid nickname: %s" % extra_info_comp[0])
|
||||
elif not stem.util.tor_tools.is_valid_fingerprint(extra_info_comp[1]):
|
||||
raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % extra_info_comp[1])
|
||||
|
||||
descriptor.nickname = extra_info_comp[0]
|
||||
descriptor.fingerprint = extra_info_comp[1]
|
||||
|
||||
|
||||
def _parse_transport_line(descriptor, entries):
|
||||
# "transport" transportname address:port [arglist]
|
||||
# Everything after the transportname is scrubbed in published bridge
|
||||
# descriptors, so we'll never see it in practice.
|
||||
#
|
||||
# These entries really only make sense for bridges, but have been seen
|
||||
# on non-bridges in the wild when the relay operator configured it this
|
||||
# way.
|
||||
|
||||
transports = {}
|
||||
|
||||
for value in _values('transport', entries):
|
||||
name, address, port, args = None, None, None, None
|
||||
|
||||
if ' ' not in value:
|
||||
# scrubbed
|
||||
name = value
|
||||
else:
|
||||
# not scrubbed
|
||||
value_comp = value.split()
|
||||
|
||||
if len(value_comp) < 1:
|
||||
raise ValueError('Transport line is missing its transport name: transport %s' % value)
|
||||
elif len(value_comp) < 2:
|
||||
raise ValueError('Transport line is missing its address:port value: transport %s' % value)
|
||||
elif ':' not in value_comp[1]:
|
||||
raise ValueError("Transport line's address:port entry is missing a colon: transport %s" % value)
|
||||
|
||||
name = value_comp[0]
|
||||
address, port_str = value_comp[1].split(':', 1)
|
||||
|
||||
if not stem.util.connection.is_valid_ipv4_address(address) or \
|
||||
stem.util.connection.is_valid_ipv6_address(address):
|
||||
raise ValueError('Transport line has a malformed address: transport %s' % value)
|
||||
elif not stem.util.connection.is_valid_port(port_str):
|
||||
raise ValueError('Transport line has a malformed port: transport %s' % value)
|
||||
|
||||
port = int(port_str)
|
||||
args = value_comp[2:] if len(value_comp) >= 3 else []
|
||||
|
||||
transports[name] = (address, port, args)
|
||||
|
||||
descriptor.transport = transports
|
||||
|
||||
|
||||
def _parse_cell_circuits_per_decline_line(descriptor, entries):
|
||||
# "cell-circuits-per-decile" num
|
||||
|
||||
value = _value('cell-circuits-per-decile', entries)
|
||||
|
||||
if not value.isdigit():
|
||||
raise ValueError('Non-numeric cell-circuits-per-decile value: %s' % value)
|
||||
elif int(value) < 0:
|
||||
raise ValueError('Negative cell-circuits-per-decile value: %s' % value)
|
||||
|
||||
descriptor.cell_circuits_per_decile = int(value)
|
||||
|
||||
|
||||
def _parse_dirreq_line(keyword, recognized_counts_attr, unrecognized_counts_attr, descriptor, entries):
|
||||
value = _value(keyword, entries)
|
||||
|
||||
recognized_counts = {}
|
||||
unrecognized_counts = {}
|
||||
|
||||
is_response_stats = keyword in ('dirreq-v2-resp', 'dirreq-v3-resp')
|
||||
key_set = DirResponse if is_response_stats else DirStat
|
||||
|
||||
key_type = 'STATUS' if is_response_stats else 'STAT'
|
||||
error_msg = '%s lines should contain %s=COUNT mappings: %s %s' % (keyword, key_type, keyword, value)
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if '=' not in entry:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
status, count = entry.split('=', 1)
|
||||
|
||||
if count.isdigit():
|
||||
if status in key_set:
|
||||
recognized_counts[status] = int(count)
|
||||
else:
|
||||
unrecognized_counts[status] = int(count)
|
||||
else:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
setattr(descriptor, recognized_counts_attr, recognized_counts)
|
||||
setattr(descriptor, unrecognized_counts_attr, unrecognized_counts)
|
||||
|
||||
|
||||
def _parse_dirreq_share_line(keyword, attribute, descriptor, entries):
|
||||
value = _value(keyword, entries)
|
||||
|
||||
if not value.endswith('%'):
|
||||
raise ValueError('%s lines should be a percentage: %s %s' % (keyword, keyword, value))
|
||||
elif float(value[:-1]) < 0:
|
||||
raise ValueError('Negative percentage value: %s %s' % (keyword, value))
|
||||
|
||||
# bug means it might be above 100%: https://lists.torproject.org/pipermail/tor-dev/2012-June/003679.html
|
||||
|
||||
setattr(descriptor, attribute, float(value[:-1]) / 100)
|
||||
|
||||
|
||||
def _parse_cell_line(keyword, attribute, descriptor, entries):
|
||||
# "<keyword>" num,...,num
|
||||
|
||||
value = _value(keyword, entries)
|
||||
entries, exc = [], None
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
try:
|
||||
# Values should be positive but as discussed in ticket #5849
|
||||
# there was a bug around this. It was fixed in tor 0.2.2.1.
|
||||
|
||||
entries.append(float(entry))
|
||||
except ValueError:
|
||||
exc = ValueError('Non-numeric entry in %s listing: %s %s' % (keyword, keyword, value))
|
||||
|
||||
setattr(descriptor, attribute, entries)
|
||||
|
||||
if exc:
|
||||
raise exc
|
||||
|
||||
|
||||
def _parse_timestamp_and_interval_line(keyword, end_attribute, interval_attribute, descriptor, entries):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s)
|
||||
|
||||
timestamp, interval, _ = _parse_timestamp_and_interval(keyword, _value(keyword, entries))
|
||||
setattr(descriptor, end_attribute, timestamp)
|
||||
setattr(descriptor, interval_attribute, interval)
|
||||
|
||||
|
||||
def _parse_conn_bi_direct_line(descriptor, entries):
|
||||
# "conn-bi-direct" YYYY-MM-DD HH:MM:SS (NSEC s) BELOW,READ,WRITE,BOTH
|
||||
|
||||
value = _value('conn-bi-direct', entries)
|
||||
timestamp, interval, remainder = _parse_timestamp_and_interval('conn-bi-direct', value)
|
||||
stats = remainder.split(',')
|
||||
|
||||
if len(stats) != 4 or not (stats[0].isdigit() and stats[1].isdigit() and stats[2].isdigit() and stats[3].isdigit()):
|
||||
raise ValueError('conn-bi-direct line should end with four numeric values: conn-bi-direct %s' % value)
|
||||
|
||||
descriptor.conn_bi_direct_end = timestamp
|
||||
descriptor.conn_bi_direct_interval = interval
|
||||
descriptor.conn_bi_direct_below = int(stats[0])
|
||||
descriptor.conn_bi_direct_read = int(stats[1])
|
||||
descriptor.conn_bi_direct_write = int(stats[2])
|
||||
descriptor.conn_bi_direct_both = int(stats[3])
|
||||
|
||||
|
||||
def _parse_history_line(keyword, end_attribute, interval_attribute, values_attribute, descriptor, entries):
|
||||
# "<keyword>" YYYY-MM-DD HH:MM:SS (NSEC s) NUM,NUM,NUM,NUM,NUM...
|
||||
|
||||
value = _value(keyword, entries)
|
||||
timestamp, interval, remainder = _parse_timestamp_and_interval(keyword, value)
|
||||
history_values = []
|
||||
|
||||
if remainder:
|
||||
try:
|
||||
history_values = [int(entry) for entry in remainder.split(',')]
|
||||
except ValueError:
|
||||
raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value))
|
||||
|
||||
setattr(descriptor, end_attribute, timestamp)
|
||||
setattr(descriptor, interval_attribute, interval)
|
||||
setattr(descriptor, values_attribute, history_values)
|
||||
|
||||
|
||||
def _parse_port_count_line(keyword, attribute, descriptor, entries):
|
||||
# "<keyword>" port=N,port=N,...
|
||||
|
||||
value, port_mappings = _value(keyword, entries), {}
|
||||
error_msg = 'Entries in %s line should only be PORT=N entries: %s %s' % (keyword, keyword, value)
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if '=' not in entry:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
port, stat = entry.split('=', 1)
|
||||
|
||||
if (port == 'other' or stem.util.connection.is_valid_port(port)) and stat.isdigit():
|
||||
if port != 'other':
|
||||
port = int(port)
|
||||
|
||||
port_mappings[port] = int(stat)
|
||||
else:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
setattr(descriptor, attribute, port_mappings)
|
||||
|
||||
|
||||
def _parse_geoip_to_count_line(keyword, attribute, descriptor, entries):
|
||||
# "<keyword>" CC=N,CC=N,...
|
||||
#
|
||||
# The maxmind geoip (https://www.maxmind.com/app/iso3166) has numeric
|
||||
# locale codes for some special values, for instance...
|
||||
# A1,"Anonymous Proxy"
|
||||
# A2,"Satellite Provider"
|
||||
# ??,"Unknown"
|
||||
|
||||
value, locale_usage = _value(keyword, entries), {}
|
||||
error_msg = 'Entries in %s line should only be CC=N entries: %s %s' % (keyword, keyword, value)
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if '=' not in entry:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
locale, count = entry.split('=', 1)
|
||||
|
||||
if _locale_re.match(locale) and count.isdigit():
|
||||
locale_usage[locale] = int(count)
|
||||
else:
|
||||
raise ValueError(error_msg)
|
||||
|
||||
setattr(descriptor, attribute, locale_usage)
|
||||
|
||||
|
||||
def _parse_bridge_ip_versions_line(descriptor, entries):
|
||||
value, ip_versions = _value('bridge-ip-versions', entries), {}
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if '=' not in entry:
|
||||
raise stem.ProtocolError("The bridge-ip-versions should be a comma separated listing of '<protocol>=<count>' mappings: bridge-ip-versions %s" % value)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError('IP protocol count was non-numeric (%s): bridge-ip-versions %s' % (count, value))
|
||||
|
||||
ip_versions[protocol] = int(count)
|
||||
|
||||
descriptor.ip_versions = ip_versions
|
||||
|
||||
|
||||
def _parse_bridge_ip_transports_line(descriptor, entries):
|
||||
value, ip_transports = _value('bridge-ip-transports', entries), {}
|
||||
|
||||
if value:
|
||||
for entry in value.split(','):
|
||||
if '=' not in entry:
|
||||
raise stem.ProtocolError("The bridge-ip-transports should be a comma separated listing of '<protocol>=<count>' mappings: bridge-ip-transports %s" % value)
|
||||
|
||||
protocol, count = entry.split('=', 1)
|
||||
|
||||
if not count.isdigit():
|
||||
raise stem.ProtocolError('Transport count was non-numeric (%s): bridge-ip-transports %s' % (count, value))
|
||||
|
||||
ip_transports[protocol] = int(count)
|
||||
|
||||
descriptor.ip_transports = ip_transports
|
||||
|
||||
|
||||
def _parse_hs_stats(keyword, stat_attribute, extra_attribute, descriptor, entries):
|
||||
# "<keyword>" num key=val key=val...
|
||||
|
||||
value, stat, extra = _value(keyword, entries), None, {}
|
||||
|
||||
if value is not None:
|
||||
value_comp = value.split()
|
||||
|
||||
if not value_comp:
|
||||
raise ValueError("'%s' line was blank" % keyword)
|
||||
|
||||
try:
|
||||
stat = int(value_comp[0])
|
||||
except ValueError:
|
||||
raise ValueError("'%s' stat was non-numeric (%s): %s %s" % (keyword, value_comp[0], keyword, value))
|
||||
|
||||
for entry in value_comp[1:]:
|
||||
if '=' not in entry:
|
||||
raise ValueError('Entries after the stat in %s lines should only be key=val entries: %s %s' % (keyword, keyword, value))
|
||||
|
||||
key, val = entry.split('=', 1)
|
||||
extra[key] = val
|
||||
|
||||
setattr(descriptor, stat_attribute, stat)
|
||||
setattr(descriptor, extra_attribute, extra)
|
||||
|
||||
|
||||
_parse_geoip_db_digest_line = _parse_forty_character_hex('geoip-db-digest', 'geoip_db_digest')
|
||||
_parse_geoip6_db_digest_line = _parse_forty_character_hex('geoip6-db-digest', 'geoip6_db_digest')
|
||||
_parse_dirreq_v2_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-resp', 'dir_v2_responses', 'dir_v2_responses_unknown')
|
||||
_parse_dirreq_v3_resp_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-resp', 'dir_v3_responses', 'dir_v3_responses_unknown')
|
||||
_parse_dirreq_v2_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-direct-dl', 'dir_v2_direct_dl', 'dir_v2_direct_dl_unknown')
|
||||
_parse_dirreq_v3_direct_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-direct-dl', 'dir_v3_direct_dl', 'dir_v3_direct_dl_unknown')
|
||||
_parse_dirreq_v2_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v2-tunneled-dl', 'dir_v2_tunneled_dl', 'dir_v2_tunneled_dl_unknown')
|
||||
_parse_dirreq_v3_tunneled_dl_line = functools.partial(_parse_dirreq_line, 'dirreq-v3-tunneled-dl', 'dir_v3_tunneled_dl', 'dir_v3_tunneled_dl_unknown')
|
||||
_parse_dirreq_v2_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v2-share', 'dir_v2_share')
|
||||
_parse_dirreq_v3_share_line = functools.partial(_parse_dirreq_share_line, 'dirreq-v3-share', 'dir_v3_share')
|
||||
_parse_cell_processed_cells_line = functools.partial(_parse_cell_line, 'cell-processed-cells', 'cell_processed_cells')
|
||||
_parse_cell_queued_cells_line = functools.partial(_parse_cell_line, 'cell-queued-cells', 'cell_queued_cells')
|
||||
_parse_cell_time_in_queue_line = functools.partial(_parse_cell_line, 'cell-time-in-queue', 'cell_time_in_queue')
|
||||
_parse_published_line = _parse_timestamp_line('published', 'published')
|
||||
_parse_geoip_start_time_line = _parse_timestamp_line('geoip-start-time', 'geoip_start_time')
|
||||
_parse_cell_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'cell-stats-end', 'cell_stats_end', 'cell_stats_interval')
|
||||
_parse_entry_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'entry-stats-end', 'entry_stats_end', 'entry_stats_interval')
|
||||
_parse_exit_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'exit-stats-end', 'exit_stats_end', 'exit_stats_interval')
|
||||
_parse_bridge_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'bridge-stats-end', 'bridge_stats_end', 'bridge_stats_interval')
|
||||
_parse_dirreq_stats_end_line = functools.partial(_parse_timestamp_and_interval_line, 'dirreq-stats-end', 'dir_stats_end', 'dir_stats_interval')
|
||||
_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values')
|
||||
_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values')
|
||||
_parse_dirreq_read_history_line = functools.partial(_parse_history_line, 'dirreq-read-history', 'dir_read_history_end', 'dir_read_history_interval', 'dir_read_history_values')
|
||||
_parse_dirreq_write_history_line = functools.partial(_parse_history_line, 'dirreq-write-history', 'dir_write_history_end', 'dir_write_history_interval', 'dir_write_history_values')
|
||||
_parse_exit_kibibytes_written_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-written', 'exit_kibibytes_written')
|
||||
_parse_exit_kibibytes_read_line = functools.partial(_parse_port_count_line, 'exit-kibibytes-read', 'exit_kibibytes_read')
|
||||
_parse_exit_streams_opened_line = functools.partial(_parse_port_count_line, 'exit-streams-opened', 'exit_streams_opened')
|
||||
_parse_hidden_service_stats_end_line = _parse_timestamp_line('hidserv-stats-end', 'hs_stats_end')
|
||||
_parse_hidden_service_rend_relayed_cells_line = functools.partial(_parse_hs_stats, 'hidserv-rend-relayed-cells', 'hs_rend_cells', 'hs_rend_cells_attr')
|
||||
_parse_hidden_service_dir_onions_seen_line = functools.partial(_parse_hs_stats, 'hidserv-dir-onions-seen', 'hs_dir_onions_seen', 'hs_dir_onions_seen_attr')
|
||||
_parse_dirreq_v2_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-ips', 'dir_v2_ips')
|
||||
_parse_dirreq_v3_ips_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-ips', 'dir_v3_ips')
|
||||
_parse_dirreq_v2_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v2-reqs', 'dir_v2_requests')
|
||||
_parse_dirreq_v3_reqs_line = functools.partial(_parse_geoip_to_count_line, 'dirreq-v3-reqs', 'dir_v3_requests')
|
||||
_parse_geoip_client_origins_line = functools.partial(_parse_geoip_to_count_line, 'geoip-client-origins', 'geoip_client_origins')
|
||||
_parse_entry_ips_line = functools.partial(_parse_geoip_to_count_line, 'entry-ips', 'entry_ips')
|
||||
_parse_bridge_ips_line = functools.partial(_parse_geoip_to_count_line, 'bridge-ips', 'bridge_ips')
|
||||
_parse_router_digest_line = _parse_forty_character_hex('router-digest', '_digest')
|
||||
_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE')
|
||||
|
||||
|
||||
class ExtraInfoDescriptor(Descriptor):
|
||||
"""
|
||||
Extra-info descriptor document.
|
||||
|
||||
:var str nickname: **\*** relay's nickname
|
||||
:var str fingerprint: **\*** identity key fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var str geoip_db_digest: sha1 of the geoIP database file for IPv4 addresses
|
||||
:var str geoip6_db_digest: sha1 of the geoIP database file for IPv6 addresses
|
||||
:var dict transport: **\*** mapping of transport methods to their (address,
|
||||
port, args) tuple, these usually appear on bridges in which case all of
|
||||
those are **None**
|
||||
|
||||
**Bi-directional connection usage:**
|
||||
|
||||
:var datetime conn_bi_direct_end: end of the sampling interval
|
||||
:var int conn_bi_direct_interval: seconds per interval
|
||||
:var int conn_bi_direct_below: connections that read/wrote less than 20 KiB
|
||||
:var int conn_bi_direct_read: connections that read at least 10x more than wrote
|
||||
:var int conn_bi_direct_write: connections that wrote at least 10x more than read
|
||||
:var int conn_bi_direct_both: remaining connections
|
||||
|
||||
**Bytes read/written for relayed traffic:**
|
||||
|
||||
:var datetime read_history_end: end of the sampling interval
|
||||
:var int read_history_interval: seconds per interval
|
||||
:var list read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime write_history_end: end of the sampling interval
|
||||
:var int write_history_interval: seconds per interval
|
||||
:var list write_history_values: bytes written during each interval
|
||||
|
||||
**Cell relaying statistics:**
|
||||
|
||||
:var datetime cell_stats_end: end of the period when stats were gathered
|
||||
:var int cell_stats_interval: length in seconds of the interval
|
||||
:var list cell_processed_cells: measurement of processed cells per circuit
|
||||
:var list cell_queued_cells: measurement of queued cells per circuit
|
||||
:var list cell_time_in_queue: mean enqueued time in milliseconds for cells
|
||||
:var int cell_circuits_per_decile: mean number of circuits in a decile
|
||||
|
||||
**Directory Mirror Attributes:**
|
||||
|
||||
:var datetime dir_stats_end: end of the period when stats were gathered
|
||||
:var int dir_stats_interval: length in seconds of the interval
|
||||
:var dict dir_v2_ips: mapping of locales to rounded count of requester ips
|
||||
:var dict dir_v3_ips: mapping of locales to rounded count of requester ips
|
||||
:var float dir_v2_share: percent of total directory traffic it expects to serve
|
||||
:var float dir_v3_share: percent of total directory traffic it expects to serve
|
||||
:var dict dir_v2_requests: mapping of locales to rounded count of requests
|
||||
:var dict dir_v3_requests: mapping of locales to rounded count of requests
|
||||
|
||||
:var dict dir_v2_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
|
||||
:var dict dir_v3_responses: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirResponse` to their rounded count
|
||||
:var dict dir_v2_responses_unknown: mapping of unrecognized statuses to their count
|
||||
:var dict dir_v3_responses_unknown: mapping of unrecognized statuses to their count
|
||||
|
||||
:var dict dir_v2_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
|
||||
:var dict dir_v3_direct_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over DirPort
|
||||
:var dict dir_v2_direct_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
:var dict dir_v3_direct_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
|
||||
:var dict dir_v2_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
|
||||
:var dict dir_v3_tunneled_dl: mapping of :data:`~stem.descriptor.extrainfo_descriptor.DirStat` to measurement over ORPort
|
||||
:var dict dir_v2_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
:var dict dir_v3_tunneled_dl_unknown: mapping of unrecognized stats to their measurement
|
||||
|
||||
**Bytes read/written for directory mirroring:**
|
||||
|
||||
:var datetime dir_read_history_end: end of the sampling interval
|
||||
:var int dir_read_history_interval: seconds per interval
|
||||
:var list dir_read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime dir_write_history_end: end of the sampling interval
|
||||
:var int dir_write_history_interval: seconds per interval
|
||||
:var list dir_write_history_values: bytes read during each interval
|
||||
|
||||
**Guard Attributes:**
|
||||
|
||||
:var datetime entry_stats_end: end of the period when stats were gathered
|
||||
:var int entry_stats_interval: length in seconds of the interval
|
||||
:var dict entry_ips: mapping of locales to rounded count of unique user ips
|
||||
|
||||
**Exit Attributes:**
|
||||
|
||||
:var datetime exit_stats_end: end of the period when stats were gathered
|
||||
:var int exit_stats_interval: length in seconds of the interval
|
||||
:var dict exit_kibibytes_written: traffic per port (keys are ints or 'other')
|
||||
:var dict exit_kibibytes_read: traffic per port (keys are ints or 'other')
|
||||
:var dict exit_streams_opened: streams per port (keys are ints or 'other')
|
||||
|
||||
**Hidden Service Attributes:**
|
||||
|
||||
:var datetime hs_stats_end: end of the sampling interval
|
||||
:var int hs_rend_cells: rounded count of the RENDEZVOUS1 cells seen
|
||||
:var int hs_rend_cells_attr: **\*** attributes provided for the hs_rend_cells
|
||||
:var int hs_dir_onions_seen: rounded count of the identities seen
|
||||
:var int hs_dir_onions_seen_attr: **\*** attributes provided for the hs_dir_onions_seen
|
||||
|
||||
**Bridge Attributes:**
|
||||
|
||||
:var datetime bridge_stats_end: end of the period when stats were gathered
|
||||
:var int bridge_stats_interval: length in seconds of the interval
|
||||
:var dict bridge_ips: mapping of locales to rounded count of unique user ips
|
||||
:var datetime geoip_start_time: replaced by bridge_stats_end (deprecated)
|
||||
:var dict geoip_client_origins: replaced by bridge_ips (deprecated)
|
||||
:var dict ip_versions: mapping of ip protocols to a rounded count for the number of users
|
||||
:var dict ip_versions: mapping of ip transports to a count for the number of users
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
|
||||
.. versionchanged:: 1.4.0
|
||||
Added the hs_stats_end, hs_rend_cells, hs_rend_cells_attr,
|
||||
hs_dir_onions_seen, and hs_dir_onions_seen_attr attributes.
|
||||
"""
|
||||
|
||||
ATTRIBUTES = {
|
||||
'nickname': (None, _parse_extra_info_line),
|
||||
'fingerprint': (None, _parse_extra_info_line),
|
||||
'published': (None, _parse_published_line),
|
||||
'geoip_db_digest': (None, _parse_geoip_db_digest_line),
|
||||
'geoip6_db_digest': (None, _parse_geoip6_db_digest_line),
|
||||
'transport': ({}, _parse_transport_line),
|
||||
|
||||
'conn_bi_direct_end': (None, _parse_conn_bi_direct_line),
|
||||
'conn_bi_direct_interval': (None, _parse_conn_bi_direct_line),
|
||||
'conn_bi_direct_below': (None, _parse_conn_bi_direct_line),
|
||||
'conn_bi_direct_read': (None, _parse_conn_bi_direct_line),
|
||||
'conn_bi_direct_write': (None, _parse_conn_bi_direct_line),
|
||||
'conn_bi_direct_both': (None, _parse_conn_bi_direct_line),
|
||||
|
||||
'read_history_end': (None, _parse_read_history_line),
|
||||
'read_history_interval': (None, _parse_read_history_line),
|
||||
'read_history_values': (None, _parse_read_history_line),
|
||||
|
||||
'write_history_end': (None, _parse_write_history_line),
|
||||
'write_history_interval': (None, _parse_write_history_line),
|
||||
'write_history_values': (None, _parse_write_history_line),
|
||||
|
||||
'cell_stats_end': (None, _parse_cell_stats_end_line),
|
||||
'cell_stats_interval': (None, _parse_cell_stats_end_line),
|
||||
'cell_processed_cells': (None, _parse_cell_processed_cells_line),
|
||||
'cell_queued_cells': (None, _parse_cell_queued_cells_line),
|
||||
'cell_time_in_queue': (None, _parse_cell_time_in_queue_line),
|
||||
'cell_circuits_per_decile': (None, _parse_cell_circuits_per_decline_line),
|
||||
|
||||
'dir_stats_end': (None, _parse_dirreq_stats_end_line),
|
||||
'dir_stats_interval': (None, _parse_dirreq_stats_end_line),
|
||||
'dir_v2_ips': (None, _parse_dirreq_v2_ips_line),
|
||||
'dir_v3_ips': (None, _parse_dirreq_v3_ips_line),
|
||||
'dir_v2_share': (None, _parse_dirreq_v2_share_line),
|
||||
'dir_v3_share': (None, _parse_dirreq_v3_share_line),
|
||||
'dir_v2_requests': (None, _parse_dirreq_v2_reqs_line),
|
||||
'dir_v3_requests': (None, _parse_dirreq_v3_reqs_line),
|
||||
'dir_v2_responses': (None, _parse_dirreq_v2_resp_line),
|
||||
'dir_v3_responses': (None, _parse_dirreq_v3_resp_line),
|
||||
'dir_v2_responses_unknown': (None, _parse_dirreq_v2_resp_line),
|
||||
'dir_v3_responses_unknown': (None, _parse_dirreq_v3_resp_line),
|
||||
'dir_v2_direct_dl': (None, _parse_dirreq_v2_direct_dl_line),
|
||||
'dir_v3_direct_dl': (None, _parse_dirreq_v3_direct_dl_line),
|
||||
'dir_v2_direct_dl_unknown': (None, _parse_dirreq_v2_direct_dl_line),
|
||||
'dir_v3_direct_dl_unknown': (None, _parse_dirreq_v3_direct_dl_line),
|
||||
'dir_v2_tunneled_dl': (None, _parse_dirreq_v2_tunneled_dl_line),
|
||||
'dir_v3_tunneled_dl': (None, _parse_dirreq_v3_tunneled_dl_line),
|
||||
'dir_v2_tunneled_dl_unknown': (None, _parse_dirreq_v2_tunneled_dl_line),
|
||||
'dir_v3_tunneled_dl_unknown': (None, _parse_dirreq_v3_tunneled_dl_line),
|
||||
|
||||
'dir_read_history_end': (None, _parse_dirreq_read_history_line),
|
||||
'dir_read_history_interval': (None, _parse_dirreq_read_history_line),
|
||||
'dir_read_history_values': (None, _parse_dirreq_read_history_line),
|
||||
|
||||
'dir_write_history_end': (None, _parse_dirreq_write_history_line),
|
||||
'dir_write_history_interval': (None, _parse_dirreq_write_history_line),
|
||||
'dir_write_history_values': (None, _parse_dirreq_write_history_line),
|
||||
|
||||
'entry_stats_end': (None, _parse_entry_stats_end_line),
|
||||
'entry_stats_interval': (None, _parse_entry_stats_end_line),
|
||||
'entry_ips': (None, _parse_entry_ips_line),
|
||||
|
||||
'exit_stats_end': (None, _parse_exit_stats_end_line),
|
||||
'exit_stats_interval': (None, _parse_exit_stats_end_line),
|
||||
'exit_kibibytes_written': (None, _parse_exit_kibibytes_written_line),
|
||||
'exit_kibibytes_read': (None, _parse_exit_kibibytes_read_line),
|
||||
'exit_streams_opened': (None, _parse_exit_streams_opened_line),
|
||||
|
||||
'hs_stats_end': (None, _parse_hidden_service_stats_end_line),
|
||||
'hs_rend_cells': (None, _parse_hidden_service_rend_relayed_cells_line),
|
||||
'hs_rend_cells_attr': ({}, _parse_hidden_service_rend_relayed_cells_line),
|
||||
'hs_dir_onions_seen': (None, _parse_hidden_service_dir_onions_seen_line),
|
||||
'hs_dir_onions_seen_attr': ({}, _parse_hidden_service_dir_onions_seen_line),
|
||||
|
||||
'bridge_stats_end': (None, _parse_bridge_stats_end_line),
|
||||
'bridge_stats_interval': (None, _parse_bridge_stats_end_line),
|
||||
'bridge_ips': (None, _parse_bridge_ips_line),
|
||||
'geoip_start_time': (None, _parse_geoip_start_time_line),
|
||||
'geoip_client_origins': (None, _parse_geoip_client_origins_line),
|
||||
|
||||
'ip_versions': (None, _parse_bridge_ip_versions_line),
|
||||
'ip_transports': (None, _parse_bridge_ip_transports_line),
|
||||
}
|
||||
|
||||
PARSER_FOR_LINE = {
|
||||
'extra-info': _parse_extra_info_line,
|
||||
'geoip-db-digest': _parse_geoip_db_digest_line,
|
||||
'geoip6-db-digest': _parse_geoip6_db_digest_line,
|
||||
'transport': _parse_transport_line,
|
||||
'cell-circuits-per-decile': _parse_cell_circuits_per_decline_line,
|
||||
'dirreq-v2-resp': _parse_dirreq_v2_resp_line,
|
||||
'dirreq-v3-resp': _parse_dirreq_v3_resp_line,
|
||||
'dirreq-v2-direct-dl': _parse_dirreq_v2_direct_dl_line,
|
||||
'dirreq-v3-direct-dl': _parse_dirreq_v3_direct_dl_line,
|
||||
'dirreq-v2-tunneled-dl': _parse_dirreq_v2_tunneled_dl_line,
|
||||
'dirreq-v3-tunneled-dl': _parse_dirreq_v3_tunneled_dl_line,
|
||||
'dirreq-v2-share': _parse_dirreq_v2_share_line,
|
||||
'dirreq-v3-share': _parse_dirreq_v3_share_line,
|
||||
'cell-processed-cells': _parse_cell_processed_cells_line,
|
||||
'cell-queued-cells': _parse_cell_queued_cells_line,
|
||||
'cell-time-in-queue': _parse_cell_time_in_queue_line,
|
||||
'published': _parse_published_line,
|
||||
'geoip-start-time': _parse_geoip_start_time_line,
|
||||
'cell-stats-end': _parse_cell_stats_end_line,
|
||||
'entry-stats-end': _parse_entry_stats_end_line,
|
||||
'exit-stats-end': _parse_exit_stats_end_line,
|
||||
'bridge-stats-end': _parse_bridge_stats_end_line,
|
||||
'dirreq-stats-end': _parse_dirreq_stats_end_line,
|
||||
'conn-bi-direct': _parse_conn_bi_direct_line,
|
||||
'read-history': _parse_read_history_line,
|
||||
'write-history': _parse_write_history_line,
|
||||
'dirreq-read-history': _parse_dirreq_read_history_line,
|
||||
'dirreq-write-history': _parse_dirreq_write_history_line,
|
||||
'exit-kibibytes-written': _parse_exit_kibibytes_written_line,
|
||||
'exit-kibibytes-read': _parse_exit_kibibytes_read_line,
|
||||
'exit-streams-opened': _parse_exit_streams_opened_line,
|
||||
'hidserv-stats-end': _parse_hidden_service_stats_end_line,
|
||||
'hidserv-rend-relayed-cells': _parse_hidden_service_rend_relayed_cells_line,
|
||||
'hidserv-dir-onions-seen': _parse_hidden_service_dir_onions_seen_line,
|
||||
'dirreq-v2-ips': _parse_dirreq_v2_ips_line,
|
||||
'dirreq-v3-ips': _parse_dirreq_v3_ips_line,
|
||||
'dirreq-v2-reqs': _parse_dirreq_v2_reqs_line,
|
||||
'dirreq-v3-reqs': _parse_dirreq_v3_reqs_line,
|
||||
'geoip-client-origins': _parse_geoip_client_origins_line,
|
||||
'entry-ips': _parse_entry_ips_line,
|
||||
'bridge-ips': _parse_bridge_ips_line,
|
||||
'bridge-ip-versions': _parse_bridge_ip_versions_line,
|
||||
'bridge-ip-transports': _parse_bridge_ip_transports_line,
|
||||
}
|
||||
|
||||
def __init__(self, raw_contents, validate = False):
|
||||
"""
|
||||
Extra-info descriptor constructor. By default this validates the
|
||||
descriptor's content as it's parsed. This validation can be disabled to
|
||||
either improve performance or be accepting of malformed data.
|
||||
|
||||
:param str raw_contents: extra-info content provided by the relay
|
||||
:param bool validate: checks the validity of the extra-info descriptor if
|
||||
**True**, skips these checks otherwise
|
||||
|
||||
:raises: **ValueError** if the contents is malformed and validate is True
|
||||
"""
|
||||
|
||||
super(ExtraInfoDescriptor, self).__init__(raw_contents, lazy_load = not validate)
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
if validate:
|
||||
for keyword in self._required_fields():
|
||||
if keyword not in entries:
|
||||
raise ValueError("Extra-info descriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in self._required_fields() + SINGLE_FIELDS:
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in an extra-info descriptor" % keyword)
|
||||
|
||||
expected_first_keyword = self._first_keyword()
|
||||
if expected_first_keyword and expected_first_keyword != list(entries.keys())[0]:
|
||||
raise ValueError("Extra-info descriptor must start with a '%s' entry" % expected_first_keyword)
|
||||
|
||||
expected_last_keyword = self._last_keyword()
|
||||
if expected_last_keyword and expected_last_keyword != list(entries.keys())[-1]:
|
||||
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
|
||||
|
||||
self._parse(entries, validate)
|
||||
else:
|
||||
self._entries = entries
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the upper-case hex encoded sha1 of our content. This value is part
|
||||
of the server descriptor entry for this relay.
|
||||
|
||||
:returns: **str** with the upper-case hex digest value for this server
|
||||
descriptor
|
||||
"""
|
||||
|
||||
raise NotImplementedError('Unsupported Operation: this should be implemented by the ExtraInfoDescriptor subclass')
|
||||
|
||||
def _required_fields(self):
|
||||
return REQUIRED_FIELDS
|
||||
|
||||
def _first_keyword(self):
|
||||
return 'extra-info'
|
||||
|
||||
def _last_keyword(self):
|
||||
return 'router-signature'
|
||||
|
||||
|
||||
class RelayExtraInfoDescriptor(ExtraInfoDescriptor):
|
||||
"""
|
||||
Relay extra-info descriptor, constructed from data such as that provided by
|
||||
'GETINFO extra-info/digest/\*', cached descriptors, and metrics
|
||||
(`specification <https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_).
|
||||
|
||||
:var str signature: **\*** signature for this extrainfo descriptor
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
|
||||
'signature': (None, _parse_router_signature_line),
|
||||
})
|
||||
|
||||
PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
|
||||
'router-signature': _parse_router_signature_line,
|
||||
})
|
||||
|
||||
@lru_cache()
|
||||
def digest(self):
|
||||
# our digest is calculated from everything except our signature
|
||||
raw_content, ending = str(self), '\nrouter-signature\n'
|
||||
raw_content = raw_content[:raw_content.find(ending) + len(ending)]
|
||||
return hashlib.sha1(stem.util.str_tools._to_bytes(raw_content)).hexdigest().upper()
|
||||
|
||||
|
||||
class BridgeExtraInfoDescriptor(ExtraInfoDescriptor):
|
||||
"""
|
||||
Bridge extra-info descriptor (`bridge descriptor specification
|
||||
<https://collector.torproject.org/formats.html#bridge-descriptors>`_)
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(ExtraInfoDescriptor.ATTRIBUTES, **{
|
||||
'_digest': (None, _parse_router_digest_line),
|
||||
})
|
||||
|
||||
PARSER_FOR_LINE = dict(ExtraInfoDescriptor.PARSER_FOR_LINE, **{
|
||||
'router-digest': _parse_router_digest_line,
|
||||
})
|
||||
|
||||
def digest(self):
|
||||
return self._digest
|
||||
|
||||
def _required_fields(self):
|
||||
excluded_fields = [
|
||||
'router-signature',
|
||||
]
|
||||
|
||||
included_fields = [
|
||||
'router-digest',
|
||||
]
|
||||
|
||||
return tuple(included_fields + [f for f in REQUIRED_FIELDS if f not in excluded_fields])
|
||||
|
||||
def _last_keyword(self):
|
||||
return None
|
||||
|
|
@ -0,0 +1,422 @@
|
|||
# Copyright 2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor hidden service descriptors as described in Tor's `rend-spec
|
||||
<https://gitweb.torproject.org/torspec.git/tree/rend-spec.txt>`_.
|
||||
|
||||
Unlike other descriptor types these describe a hidden service rather than a
|
||||
relay. They're created by the service, and can only be fetched via relays with
|
||||
the HSDir flag.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
HiddenServiceDescriptor - Tor hidden service descriptor.
|
||||
|
||||
.. versionadded:: 1.4.0
|
||||
"""
|
||||
|
||||
# TODO: Add a description for how to retrieve them when tor supports that
|
||||
# (#14847) and then update #15009.
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
import collections
|
||||
import hashlib
|
||||
import io
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
_bytes_for_block,
|
||||
_value,
|
||||
_parse_simple_line,
|
||||
_parse_timestamp_line,
|
||||
_parse_key_block,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
REQUIRED_FIELDS = (
|
||||
'rendezvous-service-descriptor',
|
||||
'version',
|
||||
'permanent-key',
|
||||
'secret-id-part',
|
||||
'publication-time',
|
||||
'protocol-versions',
|
||||
'signature',
|
||||
)
|
||||
|
||||
INTRODUCTION_POINTS_ATTR = {
|
||||
'identifier': None,
|
||||
'address': None,
|
||||
'port': None,
|
||||
'onion_key': None,
|
||||
'service_key': None,
|
||||
'intro_authentication': [],
|
||||
}
|
||||
|
||||
# introduction-point fields that can only appear once
|
||||
|
||||
SINGLE_INTRODUCTION_POINT_FIELDS = [
|
||||
'introduction-point',
|
||||
'ip-address',
|
||||
'onion-port',
|
||||
'onion-key',
|
||||
'service-key',
|
||||
]
|
||||
|
||||
BASIC_AUTH = 1
|
||||
STEALTH_AUTH = 2
|
||||
|
||||
IntroductionPoint = collections.namedtuple('IntroductionPoints', INTRODUCTION_POINTS_ATTR.keys())
|
||||
|
||||
|
||||
class DecryptionFailure(Exception):
|
||||
"""
|
||||
Failure to decrypt the hidden service descriptor's introduction-points.
|
||||
"""
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, validate = False, **kwargs):
|
||||
"""
|
||||
Iterates over the hidden service descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.hidden_service_descriptor.HiddenServiceDescriptor`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
descriptor_content = _read_until_keywords('signature', descriptor_file)
|
||||
|
||||
# we've reached the 'signature', now include the pgp style block
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
|
||||
if descriptor_content:
|
||||
if descriptor_content[0].startswith(b'@type'):
|
||||
descriptor_content = descriptor_content[1:]
|
||||
|
||||
yield HiddenServiceDescriptor(bytes.join(b'', descriptor_content), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
def _parse_version_line(descriptor, entries):
|
||||
value = _value('version', entries)
|
||||
|
||||
if value.isdigit():
|
||||
descriptor.version = int(value)
|
||||
else:
|
||||
raise ValueError('version line must have a positive integer value: %s' % value)
|
||||
|
||||
|
||||
def _parse_protocol_versions_line(descriptor, entries):
|
||||
value = _value('protocol-versions', entries)
|
||||
|
||||
try:
|
||||
versions = [int(entry) for entry in value.split(',')]
|
||||
except ValueError:
|
||||
raise ValueError('protocol-versions line has non-numeric versoins: protocol-versions %s' % value)
|
||||
|
||||
for v in versions:
|
||||
if v <= 0:
|
||||
raise ValueError('protocol-versions must be positive integers: %s' % value)
|
||||
|
||||
descriptor.protocol_versions = versions
|
||||
|
||||
|
||||
def _parse_introduction_points_line(descriptor, entries):
|
||||
_, block_type, block_contents = entries['introduction-points'][0]
|
||||
|
||||
if not block_contents or block_type != 'MESSAGE':
|
||||
raise ValueError("'introduction-points' should be followed by a MESSAGE block, but was a %s" % block_type)
|
||||
|
||||
descriptor.introduction_points_encoded = block_contents
|
||||
|
||||
try:
|
||||
decoded_field = _bytes_for_block(block_contents)
|
||||
except TypeError:
|
||||
raise ValueError("'introduction-points' isn't base64 encoded content:\n%s" % block_contents)
|
||||
|
||||
auth_types = []
|
||||
|
||||
while decoded_field.startswith(b'service-authentication ') and b'\n' in decoded_field:
|
||||
auth_line, decoded_field = decoded_field.split(b'\n', 1)
|
||||
auth_line_comp = auth_line.split(b' ')
|
||||
|
||||
if len(auth_line_comp) < 3:
|
||||
raise ValueError("Within introduction-points we expected 'service-authentication [auth_type] [auth_data]', but had '%s'" % auth_line)
|
||||
|
||||
auth_types.append((auth_line_comp[1], auth_line_comp[2]))
|
||||
|
||||
descriptor.introduction_points_auth = auth_types
|
||||
descriptor.introduction_points_content = decoded_field
|
||||
|
||||
_parse_rendezvous_service_descriptor_line = _parse_simple_line('rendezvous-service-descriptor', 'descriptor_id')
|
||||
_parse_permanent_key_line = _parse_key_block('permanent-key', 'permanent_key', 'RSA PUBLIC KEY')
|
||||
_parse_secret_id_part_line = _parse_simple_line('secret-id-part', 'secret_id_part')
|
||||
_parse_publication_time_line = _parse_timestamp_line('publication-time', 'published')
|
||||
_parse_signature_line = _parse_key_block('signature', 'signature', 'SIGNATURE')
|
||||
|
||||
|
||||
class HiddenServiceDescriptor(Descriptor):
|
||||
"""
|
||||
Hidden service descriptor.
|
||||
|
||||
:var str descriptor_id: **\*** identifier for this descriptor, this is a base32 hash of several fields
|
||||
:var int version: **\*** hidden service descriptor version
|
||||
:var str permanent_key: **\*** long term key of the hidden service
|
||||
:var str secret_id_part: **\*** hash of the time period, cookie, and replica
|
||||
values so our descriptor_id can be validated
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var list protocol_versions: **\*** list of **int** versions that are supported when establishing a connection
|
||||
:var str introduction_points_encoded: raw introduction points blob
|
||||
:var list introduction_points_auth: **\*** tuples of the form
|
||||
(auth_method, auth_data) for our introduction_points_content
|
||||
:var bytes introduction_points_content: decoded introduction-points content
|
||||
without authentication data, if using cookie authentication this is
|
||||
encrypted
|
||||
:var str signature: signature of the descriptor content
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
ATTRIBUTES = {
|
||||
'descriptor_id': (None, _parse_rendezvous_service_descriptor_line),
|
||||
'version': (None, _parse_version_line),
|
||||
'permanent_key': (None, _parse_permanent_key_line),
|
||||
'secret_id_part': (None, _parse_secret_id_part_line),
|
||||
'published': (None, _parse_publication_time_line),
|
||||
'protocol_versions': ([], _parse_protocol_versions_line),
|
||||
'introduction_points_encoded': (None, _parse_introduction_points_line),
|
||||
'introduction_points_auth': ([], _parse_introduction_points_line),
|
||||
'introduction_points_content': (None, _parse_introduction_points_line),
|
||||
'signature': (None, _parse_signature_line),
|
||||
}
|
||||
|
||||
PARSER_FOR_LINE = {
|
||||
'rendezvous-service-descriptor': _parse_rendezvous_service_descriptor_line,
|
||||
'version': _parse_version_line,
|
||||
'permanent-key': _parse_permanent_key_line,
|
||||
'secret-id-part': _parse_secret_id_part_line,
|
||||
'publication-time': _parse_publication_time_line,
|
||||
'protocol-versions': _parse_protocol_versions_line,
|
||||
'introduction-points': _parse_introduction_points_line,
|
||||
'signature': _parse_signature_line,
|
||||
}
|
||||
|
||||
def __init__(self, raw_contents, validate = False):
|
||||
super(HiddenServiceDescriptor, self).__init__(raw_contents, lazy_load = not validate)
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
if validate:
|
||||
for keyword in REQUIRED_FIELDS:
|
||||
if keyword not in entries:
|
||||
raise ValueError("Hidden service descriptor must have a '%s' entry" % keyword)
|
||||
elif keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a hidden service descriptor" % keyword)
|
||||
|
||||
if 'rendezvous-service-descriptor' != list(entries.keys())[0]:
|
||||
raise ValueError("Hidden service descriptor must start with a 'rendezvous-service-descriptor' entry")
|
||||
elif 'signature' != list(entries.keys())[-1]:
|
||||
raise ValueError("Hidden service descriptor must end with a 'signature' entry")
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
if stem.prereq.is_crypto_available():
|
||||
signed_digest = self._digest_for_signature(self.permanent_key, self.signature)
|
||||
content_digest = self._digest_for_content(b'rendezvous-service-descriptor ', b'\nsignature\n')
|
||||
|
||||
if signed_digest != content_digest:
|
||||
raise ValueError('Decrypted digest does not match local digest (calculated: %s, local: %s)' % (signed_digest, content_digest))
|
||||
else:
|
||||
self._entries = entries
|
||||
|
||||
@lru_cache()
|
||||
def introduction_points(self, authentication_cookie = None):
|
||||
"""
|
||||
Provided this service's introduction points. This provides a list of
|
||||
IntroductionPoint instances, which have the following attributes...
|
||||
|
||||
* **identifier** (str): hash of this introduction point's identity key
|
||||
* **address** (str): address of this introduction point
|
||||
* **port** (int): port where this introduction point is listening
|
||||
* **onion_key** (str): public key for communicating with this introduction point
|
||||
* **service_key** (str): public key for communicating with this hidden service
|
||||
* **intro_authentication** (list): tuples of the form (auth_type, auth_data)
|
||||
for establishing a connection
|
||||
|
||||
:param str authentication_cookie: cookie to decrypt the introduction-points
|
||||
if it's encrypted
|
||||
|
||||
:returns: **list** of IntroductionPoints instances
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the our introduction-points is malformed
|
||||
* **DecryptionFailure** if unable to decrypt this field
|
||||
"""
|
||||
|
||||
content = self.introduction_points_content
|
||||
|
||||
if not content:
|
||||
return []
|
||||
elif authentication_cookie:
|
||||
if not stem.prereq.is_crypto_available():
|
||||
raise DecryptionFailure('Decrypting introduction-points requires pycrypto')
|
||||
|
||||
try:
|
||||
missing_padding = len(authentication_cookie) % 4
|
||||
authentication_cookie = base64.b64decode(stem.util.str_tools._to_bytes(authentication_cookie) + b'=' * missing_padding)
|
||||
except TypeError as exc:
|
||||
raise DecryptionFailure('authentication_cookie must be a base64 encoded string (%s)' % exc)
|
||||
|
||||
authentication_type = int(binascii.hexlify(content[0:1]), 16)
|
||||
|
||||
if authentication_type == BASIC_AUTH:
|
||||
content = HiddenServiceDescriptor._decrypt_basic_auth(content, authentication_cookie)
|
||||
elif authentication_type == STEALTH_AUTH:
|
||||
content = HiddenServiceDescriptor._decrypt_stealth_auth(content, authentication_cookie)
|
||||
else:
|
||||
raise DecryptionFailure("Unrecognized authentication type '%s', currently we only support basic auth (%s) and stealth auth (%s)" % (authentication_type, BASIC_AUTH, STEALTH_AUTH))
|
||||
|
||||
if not content.startswith(b'introduction-point '):
|
||||
raise DecryptionFailure('Unable to decrypt the introduction-points, maybe this is the wrong key?')
|
||||
elif not content.startswith(b'introduction-point '):
|
||||
raise DecryptionFailure('introduction-points content is encrypted, you need to provide its authentication_cookie')
|
||||
|
||||
return HiddenServiceDescriptor._parse_introduction_points(content)
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_basic_auth(content, authentication_cookie):
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util import Counter
|
||||
from Crypto.Util.number import bytes_to_long
|
||||
|
||||
try:
|
||||
client_blocks = int(binascii.hexlify(content[1:2]), 16)
|
||||
except ValueError:
|
||||
raise DecryptionFailure("When using basic auth the content should start with a number of blocks but wasn't a hex digit: %s" % binascii.hexlify(content[1:2]))
|
||||
|
||||
# parse the client id and encrypted session keys
|
||||
|
||||
client_entries_length = client_blocks * 16 * 20
|
||||
client_entries = content[2:2 + client_entries_length]
|
||||
client_keys = [(client_entries[i:i + 4], client_entries[i + 4:i + 20]) for i in range(0, client_entries_length, 4 + 16)]
|
||||
|
||||
iv = content[2 + client_entries_length:2 + client_entries_length + 16]
|
||||
encrypted = content[2 + client_entries_length + 16:]
|
||||
|
||||
client_id = hashlib.sha1(authentication_cookie + iv).digest()[:4]
|
||||
|
||||
for entry_id, encrypted_session_key in client_keys:
|
||||
if entry_id != client_id:
|
||||
continue # not the session key for this client
|
||||
|
||||
# try decrypting the session key
|
||||
|
||||
counter = Counter.new(128, initial_value = 0)
|
||||
cipher = AES.new(authentication_cookie, AES.MODE_CTR, counter = counter)
|
||||
session_key = cipher.decrypt(encrypted_session_key)
|
||||
|
||||
# attempt to decrypt the intro points with the session key
|
||||
|
||||
counter = Counter.new(128, initial_value = bytes_to_long(iv))
|
||||
cipher = AES.new(session_key, AES.MODE_CTR, counter = counter)
|
||||
decrypted = cipher.decrypt(encrypted)
|
||||
|
||||
# check if the decryption looks correct
|
||||
|
||||
if decrypted.startswith(b'introduction-point '):
|
||||
return decrypted
|
||||
|
||||
return content # nope, unable to decrypt the content
|
||||
|
||||
@staticmethod
|
||||
def _decrypt_stealth_auth(content, authentication_cookie):
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util import Counter
|
||||
from Crypto.Util.number import bytes_to_long
|
||||
|
||||
# byte 1 = authentication type, 2-17 = input vector, 18 on = encrypted content
|
||||
|
||||
iv, encrypted = content[1:17], content[17:]
|
||||
counter = Counter.new(128, initial_value = bytes_to_long(iv))
|
||||
cipher = AES.new(authentication_cookie, AES.MODE_CTR, counter = counter)
|
||||
|
||||
return cipher.decrypt(encrypted)
|
||||
|
||||
@staticmethod
|
||||
def _parse_introduction_points(content):
|
||||
"""
|
||||
Provides the parsed list of IntroductionPoint for the unencrypted content.
|
||||
"""
|
||||
|
||||
introduction_points = []
|
||||
content_io = io.BytesIO(content)
|
||||
|
||||
while True:
|
||||
content = b''.join(_read_until_keywords('introduction-point', content_io, ignore_first = True))
|
||||
|
||||
if not content:
|
||||
break # reached the end
|
||||
|
||||
attr = dict(INTRODUCTION_POINTS_ATTR)
|
||||
entries = _get_descriptor_components(content, False)
|
||||
|
||||
for keyword, values in list(entries.items()):
|
||||
value, block_type, block_contents = values[0]
|
||||
|
||||
if keyword in SINGLE_INTRODUCTION_POINT_FIELDS and len(values) > 1:
|
||||
raise ValueError("'%s' can only appear once in an introduction-point block, but appeared %i times" % (keyword, len(values)))
|
||||
|
||||
if keyword == 'introduction-point':
|
||||
attr['identifier'] = value
|
||||
elif keyword == 'ip-address':
|
||||
if not stem.util.connection.is_valid_ipv4_address(value):
|
||||
raise ValueError("'%s' is an invalid IPv4 address" % value)
|
||||
|
||||
attr['address'] = value
|
||||
elif keyword == 'onion-port':
|
||||
if not stem.util.connection.is_valid_port(value):
|
||||
raise ValueError("'%s' is an invalid port" % value)
|
||||
|
||||
attr['port'] = int(value)
|
||||
elif keyword == 'onion-key':
|
||||
attr['onion_key'] = block_contents
|
||||
elif keyword == 'service-key':
|
||||
attr['service_key'] = block_contents
|
||||
elif keyword == 'intro-authentication':
|
||||
auth_entries = []
|
||||
|
||||
for auth_value, _, _ in values:
|
||||
if ' ' not in auth_value:
|
||||
raise ValueError("We expected 'intro-authentication [auth_type] [auth_data]', but had '%s'" % auth_value)
|
||||
|
||||
auth_type, auth_data = auth_value.split(' ')[:2]
|
||||
auth_entries.append((auth_type, auth_data))
|
||||
|
||||
introduction_points.append(IntroductionPoint(**attr))
|
||||
|
||||
return introduction_points
|
||||
|
|
@ -0,0 +1,314 @@
|
|||
# Copyright 2013-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor microdescriptors, which contain a distilled version of a
|
||||
relay's server descriptor. As of Tor version 0.2.3.3-alpha Tor no longer
|
||||
downloads server descriptors by default, opting for microdescriptors instead.
|
||||
|
||||
Unlike most descriptor documents these aren't available on the metrics site
|
||||
(since they don't contain any information that the server descriptors don't).
|
||||
|
||||
The limited information in microdescriptors make them rather clunky to use
|
||||
compared with server descriptors. For instance microdescriptors lack the
|
||||
relay's fingerprint, making it difficut to use them to look up the relay's
|
||||
other descriptors.
|
||||
|
||||
To do so you need to match the microdescriptor's digest against its
|
||||
corresponding router status entry. For added fun as of this writing the
|
||||
controller doesn't even surface those router status entries
|
||||
(:trac:`7953`).
|
||||
|
||||
For instance, here's an example that prints the nickname and fignerprints of
|
||||
the exit relays.
|
||||
|
||||
::
|
||||
|
||||
import os
|
||||
|
||||
from stem.control import Controller
|
||||
from stem.descriptor import parse_file
|
||||
|
||||
with Controller.from_port(port = 9051) as controller:
|
||||
controller.authenticate()
|
||||
|
||||
exit_digests = set()
|
||||
data_dir = controller.get_conf('DataDirectory')
|
||||
|
||||
for desc in controller.get_microdescriptors():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
exit_digests.add(desc.digest)
|
||||
|
||||
print 'Exit Relays:'
|
||||
|
||||
for desc in parse_file(os.path.join(data_dir, 'cached-microdesc-consensus')):
|
||||
if desc.digest in exit_digests:
|
||||
print ' %s (%s)' % (desc.nickname, desc.fingerprint)
|
||||
|
||||
Doing the same is trivial with server descriptors...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor import parse_file
|
||||
|
||||
print 'Exit Relays:'
|
||||
|
||||
for desc in parse_file('/home/atagar/.tor/cached-descriptors'):
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print ' %s (%s)' % (desc.nickname, desc.fingerprint)
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
Microdescriptor - Tor microdescriptor.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
|
||||
import stem.exit_policy
|
||||
|
||||
from stem.descriptor import (
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
_value,
|
||||
_parse_simple_line,
|
||||
_parse_key_block,
|
||||
)
|
||||
|
||||
from stem.descriptor.router_status_entry import (
|
||||
_parse_a_line,
|
||||
_parse_p_line,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
REQUIRED_FIELDS = (
|
||||
'onion-key',
|
||||
)
|
||||
|
||||
SINGLE_FIELDS = (
|
||||
'onion-key',
|
||||
'ntor-onion-key',
|
||||
'family',
|
||||
'p',
|
||||
'p6',
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, validate = False, **kwargs):
|
||||
"""
|
||||
Iterates over the microdescriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for Microdescriptor instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
while True:
|
||||
annotations = _read_until_keywords('onion-key', descriptor_file)
|
||||
|
||||
# read until we reach an annotation or onion-key line
|
||||
descriptor_lines = []
|
||||
|
||||
# read the onion-key line, done if we're at the end of the document
|
||||
|
||||
onion_key_line = descriptor_file.readline()
|
||||
|
||||
if onion_key_line:
|
||||
descriptor_lines.append(onion_key_line)
|
||||
else:
|
||||
break
|
||||
|
||||
while True:
|
||||
last_position = descriptor_file.tell()
|
||||
line = descriptor_file.readline()
|
||||
|
||||
if not line:
|
||||
break # EOF
|
||||
elif line.startswith(b'@') or line.startswith(b'onion-key'):
|
||||
descriptor_file.seek(last_position)
|
||||
break
|
||||
else:
|
||||
descriptor_lines.append(line)
|
||||
|
||||
if descriptor_lines:
|
||||
if descriptor_lines[0].startswith(b'@type'):
|
||||
descriptor_lines = descriptor_lines[1:]
|
||||
|
||||
# strip newlines from annotations
|
||||
annotations = list(map(bytes.strip, annotations))
|
||||
|
||||
descriptor_text = bytes.join(b'', descriptor_lines)
|
||||
|
||||
yield Microdescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
break # done parsing descriptors
|
||||
|
||||
|
||||
def _parse_id_line(descriptor, entries):
|
||||
value = _value('id', entries)
|
||||
value_comp = value.split()
|
||||
|
||||
if len(value_comp) >= 2:
|
||||
descriptor.identifier_type = value_comp[0]
|
||||
descriptor.identifier = value_comp[1]
|
||||
else:
|
||||
raise ValueError("'id' lines should contain both the key type and digest: id %s" % value)
|
||||
|
||||
|
||||
_parse_digest = lambda descriptor, entries: setattr(descriptor, 'digest', hashlib.sha256(descriptor.get_bytes()).hexdigest().upper())
|
||||
_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY')
|
||||
_parse_ntor_onion_key_line = _parse_simple_line('ntor-onion-key', 'ntor_onion_key')
|
||||
_parse_family_line = lambda descriptor, entries: setattr(descriptor, 'family', _value('family', entries).split(' '))
|
||||
_parse_p6_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('p6', entries)))
|
||||
|
||||
|
||||
class Microdescriptor(Descriptor):
|
||||
"""
|
||||
Microdescriptor (`descriptor specification
|
||||
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_)
|
||||
|
||||
:var str digest: **\*** hex digest for this microdescriptor, this can be used
|
||||
to match against the corresponding digest attribute of a
|
||||
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryMicroV3`
|
||||
:var str onion_key: **\*** key used to encrypt EXTEND cells
|
||||
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
|
||||
:var list or_addresses: **\*** alternative for our address/or_port attributes, each
|
||||
entry is a tuple of the form (address (**str**), port (**int**), is_ipv6
|
||||
(**bool**))
|
||||
:var list family: **\*** nicknames or fingerprints of declared family
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy: **\*** relay's exit policy
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
|
||||
:var str identifier_type: identity digest key type
|
||||
:var str identifier: base64 encoded identity digest, this is only used for collision prevention (:trac:`11743`)
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
|
||||
.. versionchanged:: 1.1.0
|
||||
Added the identifier and identifier_type attributes.
|
||||
"""
|
||||
|
||||
ATTRIBUTES = {
|
||||
'onion_key': (None, _parse_onion_key_line),
|
||||
'ntor_onion_key': (None, _parse_ntor_onion_key_line),
|
||||
'or_addresses': ([], _parse_a_line),
|
||||
'family': ([], _parse_family_line),
|
||||
'exit_policy': (stem.exit_policy.MicroExitPolicy('reject 1-65535'), _parse_p_line),
|
||||
'exit_policy_v6': (None, _parse_p6_line),
|
||||
'identifier_type': (None, _parse_id_line),
|
||||
'identifier': (None, _parse_id_line),
|
||||
'digest': (None, _parse_digest),
|
||||
}
|
||||
|
||||
PARSER_FOR_LINE = {
|
||||
'onion-key': _parse_onion_key_line,
|
||||
'ntor-onion-key': _parse_ntor_onion_key_line,
|
||||
'a': _parse_a_line,
|
||||
'family': _parse_family_line,
|
||||
'p': _parse_p_line,
|
||||
'p6': _parse_p6_line,
|
||||
'id': _parse_id_line,
|
||||
}
|
||||
|
||||
def __init__(self, raw_contents, validate = False, annotations = None):
|
||||
super(Microdescriptor, self).__init__(raw_contents, lazy_load = not validate)
|
||||
self._annotation_lines = annotations if annotations else []
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
if validate:
|
||||
self.digest = hashlib.sha256(self.get_bytes()).hexdigest().upper()
|
||||
self._parse(entries, validate)
|
||||
self._check_constraints(entries)
|
||||
else:
|
||||
self._entries = entries
|
||||
|
||||
@lru_cache()
|
||||
def get_annotations(self):
|
||||
"""
|
||||
Provides content that appeared prior to the descriptor. If this comes from
|
||||
the cached-microdescs then this commonly contains content like...
|
||||
|
||||
::
|
||||
|
||||
@last-listed 2013-02-24 00:18:30
|
||||
|
||||
:returns: **dict** with the key/value pairs in our annotations
|
||||
"""
|
||||
|
||||
annotation_dict = {}
|
||||
|
||||
for line in self._annotation_lines:
|
||||
if b' ' in line:
|
||||
key, value = line.split(b' ', 1)
|
||||
annotation_dict[key] = value
|
||||
else:
|
||||
annotation_dict[line] = None
|
||||
|
||||
return annotation_dict
|
||||
|
||||
def get_annotation_lines(self):
|
||||
"""
|
||||
Provides the lines of content that appeared prior to the descriptor. This
|
||||
is the same as the
|
||||
:func:`~stem.descriptor.microdescriptor.Microdescriptor.get_annotations`
|
||||
results, but with the unparsed lines and ordering retained.
|
||||
|
||||
:returns: **list** with the lines of annotation that came before this descriptor
|
||||
"""
|
||||
|
||||
return self._annotation_lines
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in REQUIRED_FIELDS:
|
||||
if keyword not in entries:
|
||||
raise ValueError("Microdescriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in SINGLE_FIELDS:
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a microdescriptor" % keyword)
|
||||
|
||||
if 'onion-key' != list(entries.keys())[0]:
|
||||
raise ValueError("Microdescriptor must start with a 'onion-key' entry")
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
return 'microdescriptors' if is_plural else 'microdescriptor'
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, Microdescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
1444
Shared/lib/python3.4/site-packages/stem/descriptor/networkstatus.py
Normal file
1444
Shared/lib/python3.4/site-packages/stem/descriptor/networkstatus.py
Normal file
File diff suppressed because it is too large
Load diff
574
Shared/lib/python3.4/site-packages/stem/descriptor/reader.py
Normal file
574
Shared/lib/python3.4/site-packages/stem/descriptor/reader.py
Normal file
|
|
@ -0,0 +1,574 @@
|
|||
# Copyright 2012-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Utilities for reading descriptors from local directories and archives. This is
|
||||
mostly done through the :class:`~stem.descriptor.reader.DescriptorReader`
|
||||
class, which is an iterator for the descriptor data in a series of
|
||||
destinations. For example...
|
||||
|
||||
::
|
||||
|
||||
my_descriptors = [
|
||||
'/tmp/server-descriptors-2012-03.tar.bz2',
|
||||
'/tmp/archived_descriptors/',
|
||||
]
|
||||
|
||||
# prints the contents of all the descriptor files
|
||||
with DescriptorReader(my_descriptors) as reader:
|
||||
for descriptor in reader:
|
||||
print descriptor
|
||||
|
||||
This ignores files that cannot be processed due to read errors or unparsable
|
||||
content. To be notified of skipped files you can register a listener with
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.register_skip_listener`.
|
||||
|
||||
The :class:`~stem.descriptor.reader.DescriptorReader` keeps track of the last
|
||||
modified timestamps for descriptor files that it has read so it can skip
|
||||
unchanged files if run again. This listing of processed files can also be
|
||||
persisted and applied to other
|
||||
:class:`~stem.descriptor.reader.DescriptorReader` instances. For example, the
|
||||
following prints descriptors as they're changed over the course of a minute,
|
||||
and picks up where it left off if run again...
|
||||
|
||||
::
|
||||
|
||||
reader = DescriptorReader(['/tmp/descriptor_data'])
|
||||
|
||||
try:
|
||||
processed_files = load_processed_files('/tmp/used_descriptors')
|
||||
reader.set_processed_files(processed_files)
|
||||
except: pass # could not load, maybe this is the first run
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
while (time.time() - start_time) < 60:
|
||||
# prints any descriptors that have changed since last checked
|
||||
with reader:
|
||||
for descriptor in reader:
|
||||
print descriptor
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
save_processed_files('/tmp/used_descriptors', reader.get_processed_files())
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
load_processed_files - Loads a listing of processed files
|
||||
save_processed_files - Saves a listing of processed files
|
||||
|
||||
DescriptorReader - Iterator for descriptor data on the local file system
|
||||
|- get_processed_files - provides the listing of files that we've processed
|
||||
|- set_processed_files - sets our tracking of the files we have processed
|
||||
|- register_read_listener - adds a listener for when files are read
|
||||
|- register_skip_listener - adds a listener that's notified of skipped files
|
||||
|- start - begins reading descriptor data
|
||||
|- stop - stops reading descriptor data
|
||||
|- __enter__ / __exit__ - manages the descriptor reader thread in the context
|
||||
+- __iter__ - iterates over descriptor data in unread files
|
||||
|
||||
FileSkipped - Base exception for a file that was skipped
|
||||
|- AlreadyRead - We've already read a file with this last modified timestamp
|
||||
|- ParsingFailure - Contents can't be parsed as descriptor data
|
||||
|- UnrecognizedType - File extension indicates non-descriptor data
|
||||
+- ReadFailed - Wraps an error that was raised while reading the file
|
||||
+- FileMissing - File does not exist
|
||||
"""
|
||||
|
||||
import mimetypes
|
||||
import os
|
||||
import tarfile
|
||||
import threading
|
||||
|
||||
try:
|
||||
import queue
|
||||
except ImportError:
|
||||
import Queue as queue
|
||||
|
||||
import stem.descriptor
|
||||
import stem.prereq
|
||||
import stem.util.system
|
||||
|
||||
from stem import str_type
|
||||
|
||||
# flag to indicate when the reader thread is out of descriptor files to read
|
||||
FINISHED = 'DONE'
|
||||
|
||||
|
||||
class FileSkipped(Exception):
|
||||
"Base error when we can't provide descriptor data from a file."
|
||||
|
||||
|
||||
class AlreadyRead(FileSkipped):
|
||||
"""
|
||||
Already read a file with this 'last modified' timestamp or later.
|
||||
|
||||
:param int last_modified: unix timestamp for when the file was last modified
|
||||
:param int last_modified_when_read: unix timestamp for the modification time
|
||||
when we last read this file
|
||||
"""
|
||||
|
||||
def __init__(self, last_modified, last_modified_when_read):
|
||||
super(AlreadyRead, self).__init__('File has already been read since it was last modified. modification time: %s, last read: %s' % (last_modified, last_modified_when_read))
|
||||
self.last_modified = last_modified
|
||||
self.last_modified_when_read = last_modified_when_read
|
||||
|
||||
|
||||
class ParsingFailure(FileSkipped):
|
||||
"""
|
||||
File contents could not be parsed as descriptor data.
|
||||
|
||||
:param ValueError exception: issue that arose when parsing
|
||||
"""
|
||||
|
||||
def __init__(self, parsing_exception):
|
||||
super(ParsingFailure, self).__init__(parsing_exception)
|
||||
self.exception = parsing_exception
|
||||
|
||||
|
||||
class UnrecognizedType(FileSkipped):
|
||||
"""
|
||||
File doesn't contain descriptor data. This could either be due to its file
|
||||
type or because it doesn't conform to a recognizable descriptor type.
|
||||
|
||||
:param tuple mime_type: the (type, encoding) tuple provided by mimetypes.guess_type()
|
||||
"""
|
||||
|
||||
def __init__(self, mime_type):
|
||||
super(UnrecognizedType, self).__init__('Unrecognized mime type: %s (%s)' % mime_type)
|
||||
self.mime_type = mime_type
|
||||
|
||||
|
||||
class ReadFailed(FileSkipped):
|
||||
"""
|
||||
An IOError occurred while trying to read the file.
|
||||
|
||||
:param IOError exception: issue that arose when reading the file, **None** if
|
||||
this arose due to the file not being present
|
||||
"""
|
||||
|
||||
def __init__(self, read_exception):
|
||||
super(ReadFailed, self).__init__(read_exception)
|
||||
self.exception = read_exception
|
||||
|
||||
|
||||
class FileMissing(ReadFailed):
|
||||
'File does not exist.'
|
||||
|
||||
def __init__(self):
|
||||
super(FileMissing, self).__init__('File does not exist')
|
||||
|
||||
|
||||
def load_processed_files(path):
|
||||
"""
|
||||
Loads a dictionary of 'path => last modified timestamp' mappings, as
|
||||
persisted by :func:`~stem.descriptor.reader.save_processed_files`, from a
|
||||
file.
|
||||
|
||||
:param str path: location to load the processed files dictionary from
|
||||
|
||||
:returns: **dict** of 'path (**str**) => last modified unix timestamp
|
||||
(**int**)' mappings
|
||||
|
||||
:raises:
|
||||
* **IOError** if unable to read the file
|
||||
* **TypeError** if unable to parse the file's contents
|
||||
"""
|
||||
|
||||
processed_files = {}
|
||||
|
||||
with open(path) as input_file:
|
||||
for line in input_file.readlines():
|
||||
line = line.strip()
|
||||
|
||||
if not line:
|
||||
continue # skip blank lines
|
||||
|
||||
if ' ' not in line:
|
||||
raise TypeError('Malformed line: %s' % line)
|
||||
|
||||
path, timestamp = line.rsplit(' ', 1)
|
||||
|
||||
if not os.path.isabs(path):
|
||||
raise TypeError("'%s' is not an absolute path" % path)
|
||||
elif not timestamp.isdigit():
|
||||
raise TypeError("'%s' is not an integer timestamp" % timestamp)
|
||||
|
||||
processed_files[path] = int(timestamp)
|
||||
|
||||
return processed_files
|
||||
|
||||
|
||||
def save_processed_files(path, processed_files):
|
||||
"""
|
||||
Persists a dictionary of 'path => last modified timestamp' mappings (as
|
||||
provided by the DescriptorReader's
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.get_processed_files` method)
|
||||
so that they can be loaded later and applied to another
|
||||
:class:`~stem.descriptor.reader.DescriptorReader`.
|
||||
|
||||
:param str path: location to save the processed files dictionary to
|
||||
:param dict processed_files: 'path => last modified' mappings
|
||||
|
||||
:raises:
|
||||
* **IOError** if unable to write to the file
|
||||
* **TypeError** if processed_files is of the wrong type
|
||||
"""
|
||||
|
||||
# makes the parent directory if it doesn't already exist
|
||||
try:
|
||||
path_dir = os.path.dirname(path)
|
||||
|
||||
if not os.path.exists(path_dir):
|
||||
os.makedirs(path_dir)
|
||||
except OSError as exc:
|
||||
raise IOError(exc)
|
||||
|
||||
with open(path, 'w') as output_file:
|
||||
for path, timestamp in list(processed_files.items()):
|
||||
if not os.path.isabs(path):
|
||||
raise TypeError('Only absolute paths are acceptable: %s' % path)
|
||||
|
||||
output_file.write('%s %i\n' % (path, timestamp))
|
||||
|
||||
|
||||
class DescriptorReader(object):
|
||||
"""
|
||||
Iterator for the descriptor data on the local file system. This can process
|
||||
text files, tarball archives (gzip or bzip2), or recurse directories.
|
||||
|
||||
By default this limits the number of descriptors that we'll read ahead before
|
||||
waiting for our caller to fetch some of them. This is included to avoid
|
||||
unbounded memory usage.
|
||||
|
||||
Our persistence_path argument is a convenient method to persist the listing
|
||||
of files we have processed between runs, however it doesn't allow for error
|
||||
handling. If you want that then use the
|
||||
:func:`~stem.descriptor.reader.load_processed_files` and
|
||||
:func:`~stem.descriptor.reader.save_processed_files` functions instead.
|
||||
|
||||
:param str,list target: path or list of paths for files or directories to be read from
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param bool follow_links: determines if we'll follow symlinks when traversing
|
||||
directories (requires python 2.6)
|
||||
:param int buffer_size: descriptors we'll buffer before waiting for some to
|
||||
be read, this is unbounded if zero
|
||||
:param str persistence_path: if set we will load and save processed file
|
||||
listings from this path, errors are ignored
|
||||
:param stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
"""
|
||||
|
||||
def __init__(self, target, validate = False, follow_links = False, buffer_size = 100, persistence_path = None, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
|
||||
if isinstance(target, (bytes, str_type)):
|
||||
self._targets = [target]
|
||||
else:
|
||||
self._targets = target
|
||||
|
||||
# expand any relative paths we got
|
||||
|
||||
self._targets = list(map(os.path.abspath, self._targets))
|
||||
|
||||
self._validate = validate
|
||||
self._follow_links = follow_links
|
||||
self._persistence_path = persistence_path
|
||||
self._document_handler = document_handler
|
||||
self._kwargs = kwargs
|
||||
self._read_listeners = []
|
||||
self._skip_listeners = []
|
||||
self._processed_files = {}
|
||||
|
||||
self._reader_thread = None
|
||||
self._reader_thread_lock = threading.RLock()
|
||||
|
||||
self._iter_lock = threading.RLock()
|
||||
self._iter_notice = threading.Event()
|
||||
|
||||
self._is_stopped = threading.Event()
|
||||
self._is_stopped.set()
|
||||
|
||||
# Descriptors that we have read but not yet provided to the caller. A
|
||||
# FINISHED entry is used by the reading thread to indicate the end.
|
||||
|
||||
self._unreturned_descriptors = queue.Queue(buffer_size)
|
||||
|
||||
if self._persistence_path:
|
||||
try:
|
||||
processed_files = load_processed_files(self._persistence_path)
|
||||
self.set_processed_files(processed_files)
|
||||
except:
|
||||
pass
|
||||
|
||||
def get_processed_files(self):
|
||||
"""
|
||||
For each file that we have read descriptor data from this provides a
|
||||
mapping of the form...
|
||||
|
||||
::
|
||||
|
||||
absolute path (str) => last modified unix timestamp (int)
|
||||
|
||||
This includes entries set through the
|
||||
:func:`~stem.descriptor.reader.DescriptorReader.set_processed_files`
|
||||
method. Each run resets this to only the files that were present during
|
||||
that run.
|
||||
|
||||
:returns: **dict** with the absolute paths and unix timestamp for the last
|
||||
modified times of the files we have processed
|
||||
"""
|
||||
|
||||
# make sure that we only provide back absolute paths
|
||||
return dict((os.path.abspath(k), v) for (k, v) in list(self._processed_files.items()))
|
||||
|
||||
def set_processed_files(self, processed_files):
|
||||
"""
|
||||
Sets the listing of the files we have processed. Most often this is used
|
||||
with a newly created :class:`~stem.descriptor.reader.DescriptorReader` to
|
||||
pre-populate the listing of descriptor files that we have seen.
|
||||
|
||||
:param dict processed_files: mapping of absolute paths (**str**) to unix
|
||||
timestamps for the last modified time (**int**)
|
||||
"""
|
||||
|
||||
self._processed_files = dict(processed_files)
|
||||
|
||||
def register_read_listener(self, listener):
|
||||
"""
|
||||
Registers a listener for when files are read. This is executed prior to
|
||||
processing files. Listeners are expected to be of the form...
|
||||
|
||||
::
|
||||
|
||||
my_listener(path)
|
||||
|
||||
:param functor listener: functor to be notified when files are read
|
||||
"""
|
||||
|
||||
self._read_listeners.append(listener)
|
||||
|
||||
def register_skip_listener(self, listener):
|
||||
"""
|
||||
Registers a listener for files that are skipped. This listener is expected
|
||||
to be a functor of the form...
|
||||
|
||||
::
|
||||
|
||||
my_listener(path, exception)
|
||||
|
||||
:param functor listener: functor to be notified of files that are skipped
|
||||
to read errors or because they couldn't be parsed as valid descriptor data
|
||||
"""
|
||||
|
||||
self._skip_listeners.append(listener)
|
||||
|
||||
def get_buffered_descriptor_count(self):
|
||||
"""
|
||||
Provides the number of descriptors that are waiting to be iterated over.
|
||||
This is limited to the buffer_size that we were constructed with.
|
||||
|
||||
:returns: **int** for the estimated number of currently enqueued
|
||||
descriptors, this is not entirely reliable
|
||||
"""
|
||||
|
||||
return self._unreturned_descriptors.qsize()
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts reading our descriptor files.
|
||||
|
||||
:raises: **ValueError** if we're already reading the descriptor files
|
||||
"""
|
||||
|
||||
with self._reader_thread_lock:
|
||||
if self._reader_thread:
|
||||
raise ValueError('Already running, you need to call stop() first')
|
||||
else:
|
||||
self._is_stopped.clear()
|
||||
self._reader_thread = threading.Thread(target = self._read_descriptor_files, name='Descriptor Reader')
|
||||
self._reader_thread.setDaemon(True)
|
||||
self._reader_thread.start()
|
||||
|
||||
def stop(self):
|
||||
"""
|
||||
Stops further reading of descriptor files.
|
||||
"""
|
||||
|
||||
with self._reader_thread_lock:
|
||||
self._is_stopped.set()
|
||||
self._iter_notice.set()
|
||||
|
||||
# clears our queue to unblock enqueue calls
|
||||
|
||||
try:
|
||||
while True:
|
||||
self._unreturned_descriptors.get_nowait()
|
||||
except queue.Empty:
|
||||
pass
|
||||
|
||||
self._reader_thread.join()
|
||||
self._reader_thread = None
|
||||
|
||||
if self._persistence_path:
|
||||
try:
|
||||
processed_files = self.get_processed_files()
|
||||
save_processed_files(self._persistence_path, processed_files)
|
||||
except:
|
||||
pass
|
||||
|
||||
def _read_descriptor_files(self):
|
||||
new_processed_files = {}
|
||||
remaining_files = list(self._targets)
|
||||
|
||||
while remaining_files and not self._is_stopped.is_set():
|
||||
target = remaining_files.pop(0)
|
||||
|
||||
if not os.path.exists(target):
|
||||
self._notify_skip_listeners(target, FileMissing())
|
||||
continue
|
||||
|
||||
if os.path.isdir(target):
|
||||
walker = os.walk(target, followlinks = self._follow_links)
|
||||
self._handle_walker(walker, new_processed_files)
|
||||
else:
|
||||
self._handle_file(target, new_processed_files)
|
||||
|
||||
self._processed_files = new_processed_files
|
||||
|
||||
if not self._is_stopped.is_set():
|
||||
self._unreturned_descriptors.put(FINISHED)
|
||||
|
||||
self._iter_notice.set()
|
||||
|
||||
def __iter__(self):
|
||||
with self._iter_lock:
|
||||
while not self._is_stopped.is_set():
|
||||
try:
|
||||
descriptor = self._unreturned_descriptors.get_nowait()
|
||||
|
||||
if descriptor == FINISHED:
|
||||
break
|
||||
else:
|
||||
yield descriptor
|
||||
except queue.Empty:
|
||||
self._iter_notice.wait()
|
||||
self._iter_notice.clear()
|
||||
|
||||
def _handle_walker(self, walker, new_processed_files):
|
||||
for root, _, files in walker:
|
||||
for filename in files:
|
||||
self._handle_file(os.path.join(root, filename), new_processed_files)
|
||||
|
||||
# this can take a while if, say, we're including the root directory
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
def _handle_file(self, target, new_processed_files):
|
||||
# This is a file. Register its last modified timestamp and check if
|
||||
# it's a file that we should skip.
|
||||
|
||||
try:
|
||||
last_modified = int(os.stat(target).st_mtime)
|
||||
last_used = self._processed_files.get(target)
|
||||
new_processed_files[target] = last_modified
|
||||
except OSError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
return
|
||||
|
||||
if last_used and last_used >= last_modified:
|
||||
self._notify_skip_listeners(target, AlreadyRead(last_modified, last_used))
|
||||
return
|
||||
|
||||
# Block devices and such are never descriptors, and can cause us to block
|
||||
# for quite a while so skipping anything that isn't a regular file.
|
||||
|
||||
if not os.path.isfile(target):
|
||||
return
|
||||
|
||||
# The mimetypes module only checks the file extension. To actually
|
||||
# check the content (like the 'file' command) we'd need something like
|
||||
# pymagic (https://github.com/cloudburst/pymagic).
|
||||
|
||||
target_type = mimetypes.guess_type(target)
|
||||
|
||||
if target_type[0] in (None, 'text/plain'):
|
||||
# either '.txt' or an unknown type
|
||||
self._handle_descriptor_file(target, target_type)
|
||||
elif stem.util.system.is_tarfile(target):
|
||||
# handles gzip, bz2, and decompressed tarballs among others
|
||||
self._handle_archive(target)
|
||||
else:
|
||||
self._notify_skip_listeners(target, UnrecognizedType(target_type))
|
||||
|
||||
def _handle_descriptor_file(self, target, mime_type):
|
||||
try:
|
||||
self._notify_read_listeners(target)
|
||||
|
||||
with open(target, 'rb') as target_file:
|
||||
for desc in stem.descriptor.parse_file(target_file, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
self._unreturned_descriptors.put(desc)
|
||||
self._iter_notice.set()
|
||||
except TypeError as exc:
|
||||
self._notify_skip_listeners(target, UnrecognizedType(mime_type))
|
||||
except ValueError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
except IOError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
|
||||
def _handle_archive(self, target):
|
||||
# TODO: When dropping python 2.6 support go back to using 'with' for
|
||||
# tarfiles...
|
||||
#
|
||||
# http://bugs.python.org/issue7232
|
||||
|
||||
tar_file = None
|
||||
|
||||
try:
|
||||
self._notify_read_listeners(target)
|
||||
tar_file = tarfile.open(target)
|
||||
|
||||
for tar_entry in tar_file:
|
||||
if tar_entry.isfile():
|
||||
entry = tar_file.extractfile(tar_entry)
|
||||
|
||||
try:
|
||||
for desc in stem.descriptor.parse_file(entry, validate = self._validate, document_handler = self._document_handler, **self._kwargs):
|
||||
if self._is_stopped.is_set():
|
||||
return
|
||||
|
||||
desc._set_path(os.path.abspath(target))
|
||||
desc._set_archive_path(tar_entry.name)
|
||||
self._unreturned_descriptors.put(desc)
|
||||
self._iter_notice.set()
|
||||
except TypeError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
except ValueError as exc:
|
||||
self._notify_skip_listeners(target, ParsingFailure(exc))
|
||||
finally:
|
||||
entry.close()
|
||||
except IOError as exc:
|
||||
self._notify_skip_listeners(target, ReadFailed(exc))
|
||||
finally:
|
||||
if tar_file:
|
||||
tar_file.close()
|
||||
|
||||
def _notify_read_listeners(self, path):
|
||||
for listener in self._read_listeners:
|
||||
listener(path)
|
||||
|
||||
def _notify_skip_listeners(self, path, exception):
|
||||
for listener in self._skip_listeners:
|
||||
listener(path, exception)
|
||||
|
||||
def __enter__(self):
|
||||
self.start()
|
||||
return self
|
||||
|
||||
def __exit__(self, exit_type, value, traceback):
|
||||
self.stop()
|
||||
777
Shared/lib/python3.4/site-packages/stem/descriptor/remote.py
Normal file
777
Shared/lib/python3.4/site-packages/stem/descriptor/remote.py
Normal file
|
|
@ -0,0 +1,777 @@
|
|||
# Copyright 2013-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Module for remotely retrieving descriptors from directory authorities and
|
||||
mirrors. This is most easily done through the
|
||||
:class:`~stem.descriptor.remote.DescriptorDownloader` class, which issues
|
||||
:class:`~stem.descriptor.remote.Query` instances to get you the descriptor
|
||||
content. For example...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor.remote import DescriptorDownloader
|
||||
|
||||
downloader = DescriptorDownloader(
|
||||
use_mirrors = True,
|
||||
timeout = 10,
|
||||
)
|
||||
|
||||
query = downloader.get_server_descriptors()
|
||||
|
||||
print 'Exit Relays:'
|
||||
|
||||
try:
|
||||
for desc in query.run():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print ' %s (%s)' % (desc.nickname, desc.fingerprint)
|
||||
|
||||
print
|
||||
print 'Query took %0.2f seconds' % query.runtime
|
||||
except Exception as exc:
|
||||
print 'Unable to retrieve the server descriptors: %s' % exc
|
||||
|
||||
If you don't care about errors then you can also simply iterate over the query
|
||||
itself...
|
||||
|
||||
::
|
||||
|
||||
for desc in downloader.get_server_descriptors():
|
||||
if desc.exit_policy.is_exiting_allowed():
|
||||
print ' %s (%s)' % (desc.nickname, desc.fingerprint)
|
||||
|
||||
::
|
||||
|
||||
get_authorities - Provides tor directory information.
|
||||
|
||||
DirectoryAuthority - Information about a tor directory authority.
|
||||
|
||||
Query - Asynchronous request to download tor descriptors
|
||||
|- start - issues the query if it isn't already running
|
||||
+- run - blocks until the request is finished and provides the results
|
||||
|
||||
DescriptorDownloader - Configurable class for issuing queries
|
||||
|- use_directory_mirrors - use directory mirrors to download future descriptors
|
||||
|- get_server_descriptors - provides present server descriptors
|
||||
|- get_extrainfo_descriptors - provides present extrainfo descriptors
|
||||
|- get_microdescriptors - provides present microdescriptors
|
||||
|- get_consensus - provides the present consensus or router status entries
|
||||
|- get_key_certificates - provides present authority key certificates
|
||||
+- query - request an arbitrary descriptor resource
|
||||
|
||||
.. versionadded:: 1.1.0
|
||||
|
||||
.. data:: MAX_FINGERPRINTS
|
||||
|
||||
Maximum number of descriptors that can requested at a time by their
|
||||
fingerprints.
|
||||
|
||||
.. data:: MAX_MICRODESCRIPTOR_HASHES
|
||||
|
||||
Maximum number of microdescriptors that can requested at a time by their
|
||||
hashes.
|
||||
"""
|
||||
|
||||
import io
|
||||
import random
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import zlib
|
||||
|
||||
try:
|
||||
import urllib.request as urllib
|
||||
except ImportError:
|
||||
import urllib2 as urllib
|
||||
|
||||
import stem.descriptor
|
||||
|
||||
from stem import Flag
|
||||
from stem.util import log
|
||||
|
||||
# Tor has a limited number of descriptors we can fetch explicitly by their
|
||||
# fingerprint or hashes due to a limit on the url length by squid proxies.
|
||||
|
||||
MAX_FINGERPRINTS = 96
|
||||
MAX_MICRODESCRIPTOR_HASHES = 92
|
||||
|
||||
# We commonly only want authorities that vote in the consensus, and hence have
|
||||
# a v3ident.
|
||||
|
||||
HAS_V3IDENT = lambda auth: auth.v3ident is not None
|
||||
|
||||
|
||||
def _guess_descriptor_type(resource):
|
||||
# Attempts to determine the descriptor type based on the resource url. This
|
||||
# raises a ValueError if the resource isn't recognized.
|
||||
|
||||
if resource.startswith('/tor/server/'):
|
||||
return 'server-descriptor 1.0'
|
||||
elif resource.startswith('/tor/extra/'):
|
||||
return 'extra-info 1.0'
|
||||
elif resource.startswith('/tor/micro/'):
|
||||
return 'microdescriptor 1.0'
|
||||
elif resource.startswith('/tor/status-vote/'):
|
||||
return 'network-status-consensus-3 1.0'
|
||||
elif resource.startswith('/tor/keys/'):
|
||||
return 'dir-key-certificate-3 1.0'
|
||||
else:
|
||||
raise ValueError("Unable to determine the descriptor type for '%s'" % resource)
|
||||
|
||||
|
||||
class Query(object):
|
||||
"""
|
||||
Asynchronous request for descriptor content from a directory authority or
|
||||
mirror. These can either be made through the
|
||||
:class:`~stem.descriptor.remote.DescriptorDownloader` or directly for more
|
||||
advanced usage.
|
||||
|
||||
To block on the response and get results either call
|
||||
:func:`~stem.descriptor.remote.Query.run` or iterate over the Query. The
|
||||
:func:`~stem.descriptor.remote.Query.run` method pass along any errors that
|
||||
arise...
|
||||
|
||||
::
|
||||
|
||||
from stem.descriptor.remote import Query
|
||||
|
||||
query = Query(
|
||||
'/tor/server/all.z',
|
||||
block = True,
|
||||
timeout = 30,
|
||||
)
|
||||
|
||||
print 'Current relays:'
|
||||
|
||||
if not query.error:
|
||||
for desc in query:
|
||||
print desc.fingerprint
|
||||
else:
|
||||
print 'Unable to retrieve the server descriptors: %s' % query.error
|
||||
|
||||
... while iterating fails silently...
|
||||
|
||||
::
|
||||
|
||||
print 'Current relays:'
|
||||
|
||||
for desc in Query('/tor/server/all.z', 'server-descriptor 1.0'):
|
||||
print desc.fingerprint
|
||||
|
||||
In either case exceptions are available via our 'error' attribute.
|
||||
|
||||
Tor provides quite a few different descriptor resources via its directory
|
||||
protocol (see section 4.2 and later of the `dir-spec
|
||||
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_).
|
||||
Commonly useful ones include...
|
||||
|
||||
===================================== ===========
|
||||
Resource Description
|
||||
===================================== ===========
|
||||
/tor/server/all.z all present server descriptors
|
||||
/tor/server/fp/<fp1>+<fp2>+<fp3>.z server descriptors with the given fingerprints
|
||||
/tor/extra/all.z all present extrainfo descriptors
|
||||
/tor/extra/fp/<fp1>+<fp2>+<fp3>.z extrainfo descriptors with the given fingerprints
|
||||
/tor/micro/d/<hash1>-<hash2>.z microdescriptors with the given hashes
|
||||
/tor/status-vote/current/consensus.z present consensus
|
||||
/tor/keys/all.z key certificates for the authorities
|
||||
/tor/keys/fp/<v3ident1>+<v3ident2>.z key certificates for specific authorities
|
||||
===================================== ===========
|
||||
|
||||
The '.z' suffix can be excluded to get a plaintext rather than compressed
|
||||
response. Compression is handled transparently, so this shouldn't matter to
|
||||
the caller.
|
||||
|
||||
:var str resource: resource being fetched, such as '/tor/server/all.z'
|
||||
:var str descriptor_type: type of descriptors being fetched (for options see
|
||||
:func:`~stem.descriptor.__init__.parse_file`), this is guessed from the
|
||||
resource if **None**
|
||||
|
||||
:var list endpoints: (address, dirport) tuples of the authority or mirror
|
||||
we're querying, this uses authorities if undefined
|
||||
:var int retries: number of times to attempt the request if downloading it
|
||||
fails
|
||||
:var bool fall_back_to_authority: when retrying request issues the last
|
||||
request to a directory authority if **True**
|
||||
|
||||
:var str content: downloaded descriptor content
|
||||
:var Exception error: exception if a problem occured
|
||||
:var bool is_done: flag that indicates if our request has finished
|
||||
:var str download_url: last url used to download the descriptor, this is
|
||||
unset until we've actually made a download attempt
|
||||
|
||||
:var float start_time: unix timestamp when we first started running
|
||||
:var float timeout: duration before we'll time out our request
|
||||
:var float runtime: time our query took, this is **None** if it's not yet
|
||||
finished
|
||||
|
||||
:var bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:var stem.descriptor.__init__.DocumentHandler document_handler: method in
|
||||
which to parse a :class:`~stem.descriptor.networkstatus.NetworkStatusDocument`
|
||||
:var dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:param bool start: start making the request when constructed (default is **True**)
|
||||
:param bool block: only return after the request has been completed, this is
|
||||
the same as running **query.run(True)** (default is **False**)
|
||||
"""
|
||||
|
||||
def __init__(self, resource, descriptor_type = None, endpoints = None, retries = 2, fall_back_to_authority = False, timeout = None, start = True, block = False, validate = False, document_handler = stem.descriptor.DocumentHandler.ENTRIES, **kwargs):
|
||||
if not resource.startswith('/'):
|
||||
raise ValueError("Resources should start with a '/': %s" % resource)
|
||||
|
||||
self.resource = resource
|
||||
|
||||
if descriptor_type:
|
||||
self.descriptor_type = descriptor_type
|
||||
else:
|
||||
self.descriptor_type = _guess_descriptor_type(resource)
|
||||
|
||||
self.endpoints = endpoints if endpoints else []
|
||||
self.retries = retries
|
||||
self.fall_back_to_authority = fall_back_to_authority
|
||||
|
||||
self.content = None
|
||||
self.error = None
|
||||
self.is_done = False
|
||||
self.download_url = None
|
||||
|
||||
self.start_time = None
|
||||
self.timeout = timeout
|
||||
self.runtime = None
|
||||
|
||||
self.validate = validate
|
||||
self.document_handler = document_handler
|
||||
self.kwargs = kwargs
|
||||
|
||||
self._downloader_thread = None
|
||||
self._downloader_thread_lock = threading.RLock()
|
||||
|
||||
if start:
|
||||
self.start()
|
||||
|
||||
if block:
|
||||
self.run(True)
|
||||
|
||||
def start(self):
|
||||
"""
|
||||
Starts downloading the scriptors if we haven't started already.
|
||||
"""
|
||||
|
||||
with self._downloader_thread_lock:
|
||||
if self._downloader_thread is None:
|
||||
self._downloader_thread = threading.Thread(
|
||||
name = 'Descriptor Query',
|
||||
target = self._download_descriptors,
|
||||
args = (self.retries,)
|
||||
)
|
||||
|
||||
self._downloader_thread.setDaemon(True)
|
||||
self._downloader_thread.start()
|
||||
|
||||
def run(self, suppress = False):
|
||||
"""
|
||||
Blocks until our request is complete then provides the descriptors. If we
|
||||
haven't yet started our request then this does so.
|
||||
|
||||
:param bool suppress: avoids raising exceptions if **True**
|
||||
|
||||
:returns: list for the requested :class:`~stem.descriptor.__init__.Descriptor` instances
|
||||
|
||||
:raises:
|
||||
Using the iterator can fail with the following if **suppress** is
|
||||
**False**...
|
||||
|
||||
* **ValueError** if the descriptor contents is malformed
|
||||
* **socket.timeout** if our request timed out
|
||||
* **urllib2.URLError** for most request failures
|
||||
|
||||
Note that the urllib2 module may fail with other exception types, in
|
||||
which case we'll pass it along.
|
||||
"""
|
||||
|
||||
return list(self._run(suppress))
|
||||
|
||||
def _run(self, suppress):
|
||||
with self._downloader_thread_lock:
|
||||
self.start()
|
||||
self._downloader_thread.join()
|
||||
|
||||
if self.error:
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise self.error
|
||||
else:
|
||||
if self.content is None:
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise ValueError('BUG: _download_descriptors() finished without either results or an error')
|
||||
|
||||
try:
|
||||
results = stem.descriptor.parse_file(
|
||||
io.BytesIO(self.content),
|
||||
self.descriptor_type,
|
||||
validate = self.validate,
|
||||
document_handler = self.document_handler,
|
||||
**self.kwargs
|
||||
)
|
||||
|
||||
for desc in results:
|
||||
yield desc
|
||||
except ValueError as exc:
|
||||
self.error = exc # encountered a parsing error
|
||||
|
||||
if suppress:
|
||||
return
|
||||
|
||||
raise self.error
|
||||
|
||||
def __iter__(self):
|
||||
for desc in self._run(True):
|
||||
yield desc
|
||||
|
||||
def _pick_url(self, use_authority = False):
|
||||
"""
|
||||
Provides a url that can be queried. If we have multiple endpoints then one
|
||||
will be picked randomly.
|
||||
|
||||
:param bool use_authority: ignores our endpoints and uses a directory
|
||||
authority instead
|
||||
|
||||
:returns: **str** for the url being queried by this request
|
||||
"""
|
||||
|
||||
if use_authority or not self.endpoints:
|
||||
authority = random.choice(filter(HAS_V3IDENT, get_authorities().values()))
|
||||
address, dirport = authority.address, authority.dir_port
|
||||
else:
|
||||
address, dirport = random.choice(self.endpoints)
|
||||
|
||||
return 'http://%s:%i/%s' % (address, dirport, self.resource.lstrip('/'))
|
||||
|
||||
def _download_descriptors(self, retries):
|
||||
try:
|
||||
use_authority = retries == 0 and self.fall_back_to_authority
|
||||
self.download_url = self._pick_url(use_authority)
|
||||
|
||||
self.start_time = time.time()
|
||||
response = urllib.urlopen(self.download_url, timeout = self.timeout).read()
|
||||
|
||||
if self.download_url.endswith('.z'):
|
||||
response = zlib.decompress(response)
|
||||
|
||||
self.content = response.strip()
|
||||
|
||||
self.runtime = time.time() - self.start_time
|
||||
log.trace("Descriptors retrieved from '%s' in %0.2fs" % (self.download_url, self.runtime))
|
||||
except:
|
||||
exc = sys.exc_info()[1]
|
||||
|
||||
if retries > 0:
|
||||
log.debug("Unable to download descriptors from '%s' (%i retries remaining): %s" % (self.download_url, retries, exc))
|
||||
return self._download_descriptors(retries - 1)
|
||||
else:
|
||||
log.debug("Unable to download descriptors from '%s': %s" % (self.download_url, exc))
|
||||
self.error = exc
|
||||
finally:
|
||||
self.is_done = True
|
||||
|
||||
|
||||
class DescriptorDownloader(object):
|
||||
"""
|
||||
Configurable class that issues :class:`~stem.descriptor.remote.Query`
|
||||
instances on your behalf.
|
||||
|
||||
:param bool use_mirrors: downloads the present consensus and uses the directory
|
||||
mirrors to fetch future requests, this fails silently if the consensus
|
||||
cannot be downloaded
|
||||
:param default_args: default arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
"""
|
||||
|
||||
def __init__(self, use_mirrors = False, **default_args):
|
||||
self._default_args = default_args
|
||||
|
||||
authorities = filter(HAS_V3IDENT, get_authorities().values())
|
||||
self._endpoints = [(auth.address, auth.dir_port) for auth in authorities]
|
||||
|
||||
if use_mirrors:
|
||||
try:
|
||||
start_time = time.time()
|
||||
self.use_directory_mirrors()
|
||||
log.debug('Retrieved directory mirrors (took %0.2fs)' % (time.time() - start_time))
|
||||
except Exception as exc:
|
||||
log.debug('Unable to retrieve directory mirrors: %s' % exc)
|
||||
|
||||
def use_directory_mirrors(self):
|
||||
"""
|
||||
Downloads the present consensus and configures ourselves to use directory
|
||||
mirrors, in addition to authorities.
|
||||
|
||||
:returns: :class:`~stem.descriptor.networkstatus.NetworkStatusDocumentV3`
|
||||
from which we got the directory mirrors
|
||||
|
||||
:raises: **Exception** if unable to determine the directory mirrors
|
||||
"""
|
||||
|
||||
authorities = filter(HAS_V3IDENT, get_authorities().values())
|
||||
new_endpoints = set([(auth.address, auth.dir_port) for auth in authorities])
|
||||
|
||||
consensus = list(self.get_consensus(document_handler = stem.descriptor.DocumentHandler.DOCUMENT).run())[0]
|
||||
|
||||
for desc in consensus.routers.values():
|
||||
if Flag.V2DIR in desc.flags:
|
||||
new_endpoints.add((desc.address, desc.dir_port))
|
||||
|
||||
# we need our endpoints to be a list rather than set for random.choice()
|
||||
|
||||
self._endpoints = list(new_endpoints)
|
||||
|
||||
return consensus
|
||||
|
||||
def get_server_descriptors(self, fingerprints = None, **query_args):
|
||||
"""
|
||||
Provides the server descriptors with the given fingerprints. If no
|
||||
fingerprints are provided then this returns all descriptors in the present
|
||||
consensus.
|
||||
|
||||
:param str,list fingerprints: fingerprint or list of fingerprints to be
|
||||
retrieved, gets all descriptors if **None**
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the server descriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 96 descriptors by their
|
||||
fingerprints (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/server/all.z'
|
||||
|
||||
if isinstance(fingerprints, str):
|
||||
fingerprints = [fingerprints]
|
||||
|
||||
if fingerprints:
|
||||
if len(fingerprints) > MAX_FINGERPRINTS:
|
||||
raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/server/fp/%s.z' % '+'.join(fingerprints)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def get_extrainfo_descriptors(self, fingerprints = None, **query_args):
|
||||
"""
|
||||
Provides the extrainfo descriptors with the given fingerprints. If no
|
||||
fingerprints are provided then this returns all descriptors in the present
|
||||
consensus.
|
||||
|
||||
:param str,list fingerprints: fingerprint or list of fingerprints to be
|
||||
retrieved, gets all descriptors if **None**
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the extrainfo descriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 96 descriptors by their
|
||||
fingerprints (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/extra/all.z'
|
||||
|
||||
if isinstance(fingerprints, str):
|
||||
fingerprints = [fingerprints]
|
||||
|
||||
if fingerprints:
|
||||
if len(fingerprints) > MAX_FINGERPRINTS:
|
||||
raise ValueError('Unable to request more than %i descriptors at a time by their fingerprints' % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/extra/fp/%s.z' % '+'.join(fingerprints)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def get_microdescriptors(self, hashes, **query_args):
|
||||
"""
|
||||
Provides the microdescriptors with the given hashes. To get these see the
|
||||
'microdescriptor_hashes' attribute of
|
||||
:class:`~stem.descriptor.router_status_entry.RouterStatusEntryV3`. Note
|
||||
that these are only provided via a microdescriptor consensus (such as
|
||||
'cached-microdesc-consensus' in your data directory).
|
||||
|
||||
:param str,list hashes: microdescriptor hash or list of hashes to be
|
||||
retrieved
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the microdescriptors
|
||||
|
||||
:raises: **ValueError** if we request more than 92 microdescriptors by their
|
||||
hashes (this is due to a limit on the url length by squid proxies).
|
||||
"""
|
||||
|
||||
if isinstance(hashes, str):
|
||||
hashes = [hashes]
|
||||
|
||||
if len(hashes) > MAX_MICRODESCRIPTOR_HASHES:
|
||||
raise ValueError('Unable to request more than %i microdescriptors at a time by their hashes' % MAX_MICRODESCRIPTOR_HASHES)
|
||||
|
||||
return self.query('/tor/micro/d/%s.z' % '-'.join(hashes), **query_args)
|
||||
|
||||
def get_consensus(self, authority_v3ident = None, **query_args):
|
||||
"""
|
||||
Provides the present router status entries.
|
||||
|
||||
:param str authority_v3ident: fingerprint of the authority key for which
|
||||
to get the consensus, see `'v3ident' in tor's config.c
|
||||
<https://gitweb.torproject.org/tor.git/tree/src/or/config.c#n819>`_
|
||||
for the values.
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the router status
|
||||
entries
|
||||
"""
|
||||
|
||||
resource = '/tor/status-vote/current/consensus'
|
||||
|
||||
if authority_v3ident:
|
||||
resource += '/%s' % authority_v3ident
|
||||
|
||||
return self.query(resource + '.z', **query_args)
|
||||
|
||||
def get_vote(self, authority, **query_args):
|
||||
"""
|
||||
Provides the present vote for a given directory authority.
|
||||
|
||||
:param stem.descriptor.remote.DirectoryAuthority authority: authority for which to retrieve a vote for
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the router status
|
||||
entries
|
||||
"""
|
||||
|
||||
resource = '/tor/status-vote/current/authority'
|
||||
|
||||
if 'endpoint' not in query_args:
|
||||
query_args['endpoints'] = [(authority.address, authority.dir_port)]
|
||||
|
||||
return self.query(resource + '.z', **query_args)
|
||||
|
||||
def get_key_certificates(self, authority_v3idents = None, **query_args):
|
||||
"""
|
||||
Provides the key certificates for authorities with the given fingerprints.
|
||||
If no fingerprints are provided then this returns all present key
|
||||
certificates.
|
||||
|
||||
:param str authority_v3idents: fingerprint or list of fingerprints of the
|
||||
authority keys, see `'v3ident' in tor's config.c
|
||||
<https://gitweb.torproject.org/tor.git/tree/src/or/config.c#n819>`_
|
||||
for the values.
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the key certificates
|
||||
|
||||
:raises: **ValueError** if we request more than 96 key certificates by
|
||||
their identity fingerprints (this is due to a limit on the url length by
|
||||
squid proxies).
|
||||
"""
|
||||
|
||||
resource = '/tor/keys/all.z'
|
||||
|
||||
if isinstance(authority_v3idents, str):
|
||||
authority_v3idents = [authority_v3idents]
|
||||
|
||||
if authority_v3idents:
|
||||
if len(authority_v3idents) > MAX_FINGERPRINTS:
|
||||
raise ValueError('Unable to request more than %i key certificates at a time by their identity fingerprints' % MAX_FINGERPRINTS)
|
||||
|
||||
resource = '/tor/keys/fp/%s.z' % '+'.join(authority_v3idents)
|
||||
|
||||
return self.query(resource, **query_args)
|
||||
|
||||
def query(self, resource, **query_args):
|
||||
"""
|
||||
Issues a request for the given resource.
|
||||
|
||||
:param str resource: resource being fetched, such as '/tor/server/all.z'
|
||||
:param query_args: additional arguments for the
|
||||
:class:`~stem.descriptor.remote.Query` constructor
|
||||
|
||||
:returns: :class:`~stem.descriptor.remote.Query` for the descriptors
|
||||
|
||||
:raises: **ValueError** if resource is clearly invalid or the descriptor
|
||||
type can't be determined when 'descriptor_type' is **None**
|
||||
"""
|
||||
|
||||
args = dict(self._default_args)
|
||||
args.update(query_args)
|
||||
|
||||
if 'endpoints' not in args:
|
||||
args['endpoints'] = self._endpoints
|
||||
|
||||
if 'fall_back_to_authority' not in args:
|
||||
args['fall_back_to_authority'] = True
|
||||
|
||||
return Query(
|
||||
resource,
|
||||
**args
|
||||
)
|
||||
|
||||
|
||||
class DirectoryAuthority(object):
|
||||
"""
|
||||
Tor directory authority, a special type of relay `hardcoded into tor
|
||||
<https://gitweb.torproject.org/tor.git/tree/src/or/config.c#n819>`_
|
||||
that enumerates the other relays within the network.
|
||||
|
||||
At a very high level tor works as follows...
|
||||
|
||||
1. A volunteer starts up a new tor relay, during which it sends a `server
|
||||
descriptor <server_descriptor.html>`_ to each of the directory
|
||||
authorities.
|
||||
|
||||
2. Each hour the directory authorities make a `vote <networkstatus.html>`_
|
||||
that says who they think the active relays are in the network and some
|
||||
attributes about them.
|
||||
|
||||
3. The directory authorities send each other their votes, and compile that
|
||||
into the `consensus <networkstatus.html>`_. This document is very similar
|
||||
to the votes, the only difference being that the majority of the
|
||||
authorities agree upon and sign this document. The idividual relay entries
|
||||
in the vote or consensus is called `router status entries
|
||||
<router_status_entry.html>`_.
|
||||
|
||||
4. Tor clients (people using the service) download the consensus from one of
|
||||
the authorities or a mirror to determine the active relays within the
|
||||
network. They in turn use this to construct their circuits and use the
|
||||
network.
|
||||
|
||||
.. versionchanged:: 1.3.0
|
||||
Added the is_bandwidth_authority attribute.
|
||||
|
||||
:var str nickname: nickname of the authority
|
||||
:var str address: IP address of the authority, currently they're all IPv4 but
|
||||
this may not always be the case
|
||||
:var int or_port: port on which the relay services relay traffic
|
||||
:var int dir_port: port on which directory information is available
|
||||
:var str fingerprint: relay fingerprint
|
||||
:var str v3ident: identity key fingerprint used to sign votes and consensus
|
||||
"""
|
||||
|
||||
def __init__(self, nickname = None, address = None, or_port = None, dir_port = None, is_bandwidth_authority = False, fingerprint = None, v3ident = None):
|
||||
self.nickname = nickname
|
||||
self.address = address
|
||||
self.or_port = or_port
|
||||
self.dir_port = dir_port
|
||||
self.is_bandwidth_authority = is_bandwidth_authority
|
||||
self.fingerprint = fingerprint
|
||||
self.v3ident = v3ident
|
||||
|
||||
|
||||
DIRECTORY_AUTHORITIES = {
|
||||
'moria1': DirectoryAuthority(
|
||||
nickname = 'moria1',
|
||||
address = '128.31.0.39',
|
||||
or_port = 9101,
|
||||
dir_port = 9131,
|
||||
is_bandwidth_authority = True,
|
||||
fingerprint = '9695DFC35FFEB861329B9F1AB04C46397020CE31',
|
||||
v3ident = 'D586D18309DED4CD6D57C18FDB97EFA96D330566',
|
||||
),
|
||||
'tor26': DirectoryAuthority(
|
||||
nickname = 'tor26',
|
||||
address = '86.59.21.38',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = True,
|
||||
fingerprint = '847B1F850344D7876491A54892F904934E4EB85D',
|
||||
v3ident = '14C131DFC5C6F93646BE72FA1401C02A8DF2E8B4',
|
||||
),
|
||||
'dizum': DirectoryAuthority(
|
||||
nickname = 'dizum',
|
||||
address = '194.109.206.212',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = False,
|
||||
fingerprint = '7EA6EAD6FD83083C538F44038BBFA077587DD755',
|
||||
v3ident = 'E8A9C45EDE6D711294FADF8E7951F4DE6CA56B58',
|
||||
),
|
||||
'Tonga': DirectoryAuthority(
|
||||
nickname = 'Tonga',
|
||||
address = '82.94.251.203',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = False,
|
||||
fingerprint = '4A0CCD2DDC7995083D73F5D667100C8A5831F16D',
|
||||
v3ident = None, # does not vote in the consensus
|
||||
),
|
||||
'gabelmoo': DirectoryAuthority(
|
||||
nickname = 'gabelmoo',
|
||||
address = '131.188.40.189',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = True,
|
||||
fingerprint = 'F2044413DAC2E02E3D6BCF4735A19BCA1DE97281',
|
||||
v3ident = 'ED03BB616EB2F60BEC80151114BB25CEF515B226',
|
||||
),
|
||||
'dannenberg': DirectoryAuthority(
|
||||
nickname = 'dannenberg',
|
||||
address = '193.23.244.244',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = False,
|
||||
fingerprint = '7BE683E65D48141321C5ED92F075C55364AC7123',
|
||||
v3ident = '585769C78764D58426B8B52B6651A5A71137189A',
|
||||
),
|
||||
'urras': DirectoryAuthority(
|
||||
nickname = 'urras',
|
||||
address = '208.83.223.34',
|
||||
or_port = 80,
|
||||
dir_port = 443,
|
||||
is_bandwidth_authority = False,
|
||||
fingerprint = '0AD3FA884D18F89EEA2D89C019379E0E7FD94417',
|
||||
v3ident = '80550987E1D626E3EBA5E5E75A458DE0626D088C',
|
||||
),
|
||||
'maatuska': DirectoryAuthority(
|
||||
nickname = 'maatuska',
|
||||
address = '171.25.193.9',
|
||||
or_port = 80,
|
||||
dir_port = 443,
|
||||
is_bandwidth_authority = True,
|
||||
fingerprint = 'BD6A829255CB08E66FBE7D3748363586E46B3810',
|
||||
v3ident = '49015F787433103580E3B66A1707A00E60F2D15B',
|
||||
),
|
||||
'Faravahar': DirectoryAuthority(
|
||||
nickname = 'Faravahar',
|
||||
address = '154.35.175.225',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = False,
|
||||
fingerprint = 'CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC',
|
||||
v3ident = 'EFCBE720AB3A82B99F9E953CD5BF50F7EEFC7B97',
|
||||
),
|
||||
'longclaw': DirectoryAuthority(
|
||||
nickname = 'longclaw',
|
||||
address = '199.254.238.52',
|
||||
or_port = 443,
|
||||
dir_port = 80,
|
||||
is_bandwidth_authority = True,
|
||||
fingerprint = '74A910646BCEEFBCD2E874FC1DC997430F968145',
|
||||
v3ident = '23D15D965BC35114467363C165C4F724B64B4F66',
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def get_authorities():
|
||||
"""
|
||||
Provides the Tor directory authority information as of **Tor on 11/21/14**.
|
||||
The directory information hardcoded into Tor and occasionally changes, so the
|
||||
information this provides might not necessarily match your version of tor.
|
||||
|
||||
:returns: dict of str nicknames to :class:`~stem.descriptor.remote.DirectoryAuthority` instances
|
||||
"""
|
||||
|
||||
return dict(DIRECTORY_AUTHORITIES)
|
||||
|
|
@ -0,0 +1,625 @@
|
|||
# Copyright 2012-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for router status entries, the information for individual routers
|
||||
within a network status document. This information is provided from a few
|
||||
sources...
|
||||
|
||||
* control port via 'GETINFO ns/\*' and 'GETINFO md/\*' queries
|
||||
* router entries in a network status document, like the cached-consensus
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
RouterStatusEntry - Common parent for router status entries
|
||||
|- RouterStatusEntryV2 - Entry for a network status v2 document
|
||||
|- RouterStatusEntryV3 - Entry for a network status v3 document
|
||||
+- RouterStatusEntryMicroV3 - Entry for a microdescriptor flavored v3 document
|
||||
"""
|
||||
|
||||
import base64
|
||||
import binascii
|
||||
|
||||
import stem.exit_policy
|
||||
import stem.prereq
|
||||
import stem.util.str_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
KEYWORD_LINE,
|
||||
Descriptor,
|
||||
_value,
|
||||
_values,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(document_file, validate, entry_class, entry_keyword = 'r', start_position = None, end_position = None, section_end_keywords = (), extra_args = ()):
|
||||
"""
|
||||
Reads a range of the document_file containing some number of entry_class
|
||||
instances. We deliminate the entry_class entries by the keyword on their
|
||||
first line (entry_keyword). When finished the document is left at the
|
||||
end_position.
|
||||
|
||||
Either an end_position or section_end_keywords must be provided.
|
||||
|
||||
:param file document_file: file with network status document content
|
||||
:param bool validate: checks the validity of the document's contents if
|
||||
**True**, skips these checks otherwise
|
||||
:param class entry_class: class to construct instance for
|
||||
:param str entry_keyword: first keyword for the entry instances
|
||||
:param int start_position: start of the section, default is the current position
|
||||
:param int end_position: end of the section
|
||||
:param tuple section_end_keywords: keyword(s) that deliminate the end of the
|
||||
section if no end_position was provided
|
||||
:param tuple extra_args: extra arguments for the entry_class (after the
|
||||
content and validate flag)
|
||||
|
||||
:returns: iterator over entry_class instances
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
if start_position:
|
||||
document_file.seek(start_position)
|
||||
else:
|
||||
start_position = document_file.tell()
|
||||
|
||||
# check if we're starting at the end of the section (ie, there's no entries to read)
|
||||
if section_end_keywords:
|
||||
first_keyword = None
|
||||
line_match = KEYWORD_LINE.match(stem.util.str_tools._to_unicode(document_file.readline()))
|
||||
|
||||
if line_match:
|
||||
first_keyword = line_match.groups()[0]
|
||||
|
||||
document_file.seek(start_position)
|
||||
|
||||
if first_keyword in section_end_keywords:
|
||||
return
|
||||
|
||||
while end_position is None or document_file.tell() < end_position:
|
||||
desc_lines, ending_keyword = _read_until_keywords(
|
||||
(entry_keyword,) + section_end_keywords,
|
||||
document_file,
|
||||
ignore_first = True,
|
||||
end_position = end_position,
|
||||
include_ending_keyword = True
|
||||
)
|
||||
|
||||
desc_content = bytes.join(b'', desc_lines)
|
||||
|
||||
if desc_content:
|
||||
yield entry_class(desc_content, validate, *extra_args)
|
||||
|
||||
# check if we stopped at the end of the section
|
||||
if ending_keyword in section_end_keywords:
|
||||
break
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def _parse_r_line(descriptor, entries):
|
||||
# Parses a RouterStatusEntry's 'r' line. They're very nearly identical for
|
||||
# all current entry types (v2, v3, and microdescriptor v3) with one little
|
||||
# wrinkle: only the microdescriptor flavor excludes a 'digest' field.
|
||||
#
|
||||
# For v2 and v3 router status entries:
|
||||
# "r" nickname identity digest publication IP ORPort DirPort
|
||||
# example: r mauer BD7xbfsCFku3+tgybEZsg8Yjhvw itcuKQ6PuPLJ7m/Oi928WjO2j8g 2012-06-22 13:19:32 80.101.105.103 9001 0
|
||||
#
|
||||
# For v3 microdescriptor router status entries:
|
||||
# "r" nickname identity publication IP ORPort DirPort
|
||||
# example: r Konata ARIJF2zbqirB9IwsW0mQznccWww 2012-09-24 13:40:40 69.64.48.168 9001 9030
|
||||
|
||||
value = _value('r', entries)
|
||||
include_digest = not isinstance(descriptor, RouterStatusEntryMicroV3)
|
||||
|
||||
r_comp = value.split(' ')
|
||||
|
||||
# inject a None for the digest to normalize the field positioning
|
||||
if not include_digest:
|
||||
r_comp.insert(2, None)
|
||||
|
||||
if len(r_comp) < 8:
|
||||
expected_field_count = 'eight' if include_digest else 'seven'
|
||||
raise ValueError("%s 'r' line must have %s values: r %s" % (descriptor._name(), expected_field_count, value))
|
||||
|
||||
if not stem.util.tor_tools.is_valid_nickname(r_comp[0]):
|
||||
raise ValueError("%s nickname isn't valid: %s" % (descriptor._name(), r_comp[0]))
|
||||
elif not stem.util.connection.is_valid_ipv4_address(r_comp[5]):
|
||||
raise ValueError("%s address isn't a valid IPv4 address: %s" % (descriptor._name(), r_comp[5]))
|
||||
elif not stem.util.connection.is_valid_port(r_comp[6]):
|
||||
raise ValueError('%s ORPort is invalid: %s' % (descriptor._name(), r_comp[6]))
|
||||
elif not stem.util.connection.is_valid_port(r_comp[7], allow_zero = True):
|
||||
raise ValueError('%s DirPort is invalid: %s' % (descriptor._name(), r_comp[7]))
|
||||
|
||||
descriptor.nickname = r_comp[0]
|
||||
descriptor.fingerprint = _base64_to_hex(r_comp[1])
|
||||
|
||||
if include_digest:
|
||||
descriptor.digest = _base64_to_hex(r_comp[2])
|
||||
|
||||
descriptor.address = r_comp[5]
|
||||
descriptor.or_port = int(r_comp[6])
|
||||
descriptor.dir_port = None if r_comp[7] == '0' else int(r_comp[7])
|
||||
|
||||
try:
|
||||
published = '%s %s' % (r_comp[3], r_comp[4])
|
||||
descriptor.published = stem.util.str_tools._parse_timestamp(published)
|
||||
except ValueError:
|
||||
raise ValueError("Publication time time wasn't parsable: r %s" % value)
|
||||
|
||||
|
||||
def _parse_a_line(descriptor, entries):
|
||||
# "a" SP address ":" portlist
|
||||
# example: a [2001:888:2133:0:82:94:251:204]:9001
|
||||
|
||||
or_addresses = []
|
||||
|
||||
for value in _values('a', entries):
|
||||
if ':' not in value:
|
||||
raise ValueError("%s 'a' line must be of the form '[address]:[ports]': a %s" % (descriptor._name(), value))
|
||||
|
||||
address, port = value.rsplit(':', 1)
|
||||
is_ipv6 = address.startswith('[') and address.endswith(']')
|
||||
|
||||
if is_ipv6:
|
||||
address = address[1:-1] # remove brackets
|
||||
|
||||
if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
|
||||
(is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
|
||||
raise ValueError("%s 'a' line must start with an IPv6 address: a %s" % (descriptor._name(), value))
|
||||
|
||||
if stem.util.connection.is_valid_port(port):
|
||||
or_addresses.append((address, int(port), is_ipv6))
|
||||
else:
|
||||
raise ValueError("%s 'a' line had an invalid port (%s): a %s" % (descriptor._name(), port, value))
|
||||
|
||||
descriptor.or_addresses = or_addresses
|
||||
|
||||
|
||||
def _parse_s_line(descriptor, entries):
|
||||
# "s" Flags
|
||||
# example: s Named Running Stable Valid
|
||||
|
||||
value = _value('s', entries)
|
||||
flags = [] if value == '' else value.split(' ')
|
||||
descriptor.flags = flags
|
||||
|
||||
for flag in flags:
|
||||
if flags.count(flag) > 1:
|
||||
raise ValueError('%s had duplicate flags: s %s' % (descriptor._name(), value))
|
||||
elif flag == '':
|
||||
raise ValueError("%s had extra whitespace on its 's' line: s %s" % (descriptor._name(), value))
|
||||
|
||||
|
||||
def _parse_v_line(descriptor, entries):
|
||||
# "v" version
|
||||
# example: v Tor 0.2.2.35
|
||||
#
|
||||
# The spec says that if this starts with "Tor " then what follows is a
|
||||
# tor version. If not then it has "upgraded to a more sophisticated
|
||||
# protocol versioning system".
|
||||
|
||||
value = _value('v', entries)
|
||||
descriptor.version_line = value
|
||||
|
||||
if value.startswith('Tor '):
|
||||
try:
|
||||
descriptor.version = stem.version._get_version(value[4:])
|
||||
except ValueError as exc:
|
||||
raise ValueError('%s has a malformed tor version (%s): v %s' % (descriptor._name(), exc, value))
|
||||
|
||||
|
||||
def _parse_w_line(descriptor, entries):
|
||||
# "w" "Bandwidth=" INT ["Measured=" INT] ["Unmeasured=1"]
|
||||
# example: w Bandwidth=7980
|
||||
|
||||
value = _value('w', entries)
|
||||
w_comp = value.split(' ')
|
||||
|
||||
if len(w_comp) < 1:
|
||||
raise ValueError("%s 'w' line is blank: w %s" % (descriptor._name(), value))
|
||||
elif not w_comp[0].startswith('Bandwidth='):
|
||||
raise ValueError("%s 'w' line needs to start with a 'Bandwidth=' entry: w %s" % (descriptor._name(), value))
|
||||
|
||||
for w_entry in w_comp:
|
||||
if '=' in w_entry:
|
||||
w_key, w_value = w_entry.split('=', 1)
|
||||
else:
|
||||
w_key, w_value = w_entry, None
|
||||
|
||||
if w_key == 'Bandwidth':
|
||||
if not (w_value and w_value.isdigit()):
|
||||
raise ValueError("%s 'Bandwidth=' entry needs to have a numeric value: w %s" % (descriptor._name(), value))
|
||||
|
||||
descriptor.bandwidth = int(w_value)
|
||||
elif w_key == 'Measured':
|
||||
if not (w_value and w_value.isdigit()):
|
||||
raise ValueError("%s 'Measured=' entry needs to have a numeric value: w %s" % (descriptor._name(), value))
|
||||
|
||||
descriptor.measured = int(w_value)
|
||||
elif w_key == 'Unmeasured':
|
||||
if w_value != '1':
|
||||
raise ValueError("%s 'Unmeasured=' should only have the value of '1': w %s" % (descriptor._name(), value))
|
||||
|
||||
descriptor.is_unmeasured = True
|
||||
else:
|
||||
descriptor.unrecognized_bandwidth_entries.append(w_entry)
|
||||
|
||||
|
||||
def _parse_p_line(descriptor, entries):
|
||||
# "p" ("accept" / "reject") PortList
|
||||
# p reject 1-65535
|
||||
# example: p accept 80,110,143,443,993,995,6660-6669,6697,7000-7001
|
||||
|
||||
value = _value('p', entries)
|
||||
|
||||
try:
|
||||
descriptor.exit_policy = stem.exit_policy.MicroExitPolicy(value)
|
||||
except ValueError as exc:
|
||||
raise ValueError('%s exit policy is malformed (%s): p %s' % (descriptor._name(), exc, value))
|
||||
|
||||
|
||||
def _parse_m_line(descriptor, entries):
|
||||
# "m" methods 1*(algorithm "=" digest)
|
||||
# example: m 8,9,10,11,12 sha256=g1vx9si329muxV3tquWIXXySNOIwRGMeAESKs/v4DWs
|
||||
|
||||
all_hashes = []
|
||||
|
||||
for value in _values('m', entries):
|
||||
m_comp = value.split(' ')
|
||||
|
||||
if not (descriptor.document and descriptor.document.is_vote):
|
||||
vote_status = 'vote' if descriptor.document else '<undefined document>'
|
||||
raise ValueError("%s 'm' line should only appear in votes (appeared in a %s): m %s" % (descriptor._name(), vote_status, value))
|
||||
elif len(m_comp) < 1:
|
||||
raise ValueError("%s 'm' line needs to start with a series of methods: m %s" % (descriptor._name(), value))
|
||||
|
||||
try:
|
||||
methods = [int(entry) for entry in m_comp[0].split(',')]
|
||||
except ValueError:
|
||||
raise ValueError('%s microdescriptor methods should be a series of comma separated integers: m %s' % (descriptor._name(), value))
|
||||
|
||||
hashes = {}
|
||||
|
||||
for entry in m_comp[1:]:
|
||||
if '=' not in entry:
|
||||
raise ValueError("%s can only have a series of 'algorithm=digest' mappings after the methods: m %s" % (descriptor._name(), value))
|
||||
|
||||
hash_name, digest = entry.split('=', 1)
|
||||
hashes[hash_name] = digest
|
||||
|
||||
all_hashes.append((methods, hashes))
|
||||
|
||||
descriptor.microdescriptor_hashes = all_hashes
|
||||
|
||||
|
||||
def _parse_microdescriptor_m_line(descriptor, entries):
|
||||
# "m" digest
|
||||
# example: m aiUklwBrua82obG5AsTX+iEpkjQA2+AQHxZ7GwMfY70
|
||||
|
||||
descriptor.digest = _base64_to_hex(_value('m', entries), check_if_fingerprint = False)
|
||||
|
||||
|
||||
def _base64_to_hex(identity, check_if_fingerprint = True):
|
||||
"""
|
||||
Decodes a base64 value to hex. For example...
|
||||
|
||||
::
|
||||
|
||||
>>> _base64_to_hex('p1aag7VwarGxqctS7/fS0y5FU+s')
|
||||
'A7569A83B5706AB1B1A9CB52EFF7D2D32E4553EB'
|
||||
|
||||
:param str identity: encoded fingerprint from the consensus
|
||||
:param bool check_if_fingerprint: asserts that the result is a fingerprint if **True**
|
||||
|
||||
:returns: **str** with the uppercase hex encoding of the relay's fingerprint
|
||||
|
||||
:raises: **ValueError** if the result isn't a valid fingerprint
|
||||
"""
|
||||
|
||||
# trailing equal signs were stripped from the identity
|
||||
missing_padding = len(identity) % 4
|
||||
identity += '=' * missing_padding
|
||||
|
||||
try:
|
||||
identity_decoded = base64.b64decode(stem.util.str_tools._to_bytes(identity))
|
||||
except (TypeError, binascii.Error):
|
||||
raise ValueError("Unable to decode identity string '%s'" % identity)
|
||||
|
||||
fingerprint = binascii.b2a_hex(identity_decoded).upper()
|
||||
|
||||
if stem.prereq.is_python_3():
|
||||
fingerprint = stem.util.str_tools._to_unicode(fingerprint)
|
||||
|
||||
if check_if_fingerprint:
|
||||
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
|
||||
raise ValueError("Decoded '%s' to be '%s', which isn't a valid fingerprint" % (identity, fingerprint))
|
||||
|
||||
return fingerprint
|
||||
|
||||
|
||||
class RouterStatusEntry(Descriptor):
|
||||
"""
|
||||
Information about an individual router stored within a network status
|
||||
document. This is the common parent for concrete status entry types.
|
||||
|
||||
:var stem.descriptor.networkstatus.NetworkStatusDocument document: **\*** document that this descriptor came from
|
||||
|
||||
:var str nickname: **\*** router's nickname
|
||||
:var str fingerprint: **\*** router's fingerprint
|
||||
:var datetime published: **\*** router's publication
|
||||
:var str address: **\*** router's IP address
|
||||
:var int or_port: **\*** router's ORPort
|
||||
:var int dir_port: **\*** router's DirPort
|
||||
|
||||
:var list flags: **\*** list of :data:`~stem.Flag` associated with the relay
|
||||
|
||||
:var stem.version.Version version: parsed version of tor, this is **None** if
|
||||
the relay's using a new versioning scheme
|
||||
:var str version_line: versioning information reported by the relay
|
||||
"""
|
||||
|
||||
ATTRIBUTES = {
|
||||
'nickname': (None, _parse_r_line),
|
||||
'fingerprint': (None, _parse_r_line),
|
||||
'published': (None, _parse_r_line),
|
||||
'address': (None, _parse_r_line),
|
||||
'or_port': (None, _parse_r_line),
|
||||
'dir_port': (None, _parse_r_line),
|
||||
|
||||
'flags': (None, _parse_s_line),
|
||||
|
||||
'version_line': (None, _parse_v_line),
|
||||
'version': (None, _parse_v_line),
|
||||
}
|
||||
|
||||
PARSER_FOR_LINE = {
|
||||
'r': _parse_r_line,
|
||||
's': _parse_s_line,
|
||||
'v': _parse_v_line,
|
||||
}
|
||||
|
||||
def __init__(self, content, validate = False, document = None):
|
||||
"""
|
||||
Parse a router descriptor in a network status document.
|
||||
|
||||
:param str content: router descriptor content to be parsed
|
||||
:param NetworkStatusDocument document: document this descriptor came from
|
||||
:param bool validate: checks the validity of the content if **True**, skips
|
||||
these checks otherwise
|
||||
|
||||
:raises: **ValueError** if the descriptor data is invalid
|
||||
"""
|
||||
|
||||
super(RouterStatusEntry, self).__init__(content, lazy_load = not validate)
|
||||
self.document = document
|
||||
entries = _get_descriptor_components(content, validate)
|
||||
|
||||
if validate:
|
||||
for keyword in self._required_fields():
|
||||
if keyword not in entries:
|
||||
raise ValueError("%s must have a '%s' line:\n%s" % (self._name(True), keyword, str(self)))
|
||||
|
||||
for keyword in self._single_fields():
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("%s can only have a single '%s' line, got %i:\n%s" % (self._name(True), keyword, len(entries[keyword]), str(self)))
|
||||
|
||||
if 'r' != list(entries.keys())[0]:
|
||||
raise ValueError("%s are expected to start with a 'r' line:\n%s" % (self._name(True), str(self)))
|
||||
|
||||
self._parse(entries, validate)
|
||||
else:
|
||||
self._entries = entries
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
"""
|
||||
Name for this descriptor type.
|
||||
"""
|
||||
|
||||
return 'Router status entries' if is_plural else 'Router status entry'
|
||||
|
||||
def _required_fields(self):
|
||||
"""
|
||||
Provides lines that must appear in the descriptor.
|
||||
"""
|
||||
|
||||
return ()
|
||||
|
||||
def _single_fields(self):
|
||||
"""
|
||||
Provides lines that can only appear in the descriptor once.
|
||||
"""
|
||||
|
||||
return ()
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntry):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryV2(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a version 2 network
|
||||
status document.
|
||||
|
||||
:var str digest: **\*** router's upper-case hex digest
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
|
||||
'digest': (None, _parse_r_line),
|
||||
})
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
return 'Router status entries (v2)' if is_plural else 'Router status entry (v2)'
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryV2):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryV3(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a version 3 network
|
||||
status document.
|
||||
|
||||
:var list or_addresses: **\*** relay's OR addresses, this is a tuple listing
|
||||
of the form (address (**str**), port (**int**), is_ipv6 (**bool**))
|
||||
:var str digest: **\*** router's upper-case hex digest
|
||||
|
||||
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
|
||||
:var int measured: bandwidth measured to be available by the relay, this is a
|
||||
unit-less heuristic generated by the Bandwidth authoritites to weight relay
|
||||
selection
|
||||
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
|
||||
measurements
|
||||
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
|
||||
information that isn't yet recognized
|
||||
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy: router's exit policy
|
||||
|
||||
:var list microdescriptor_hashes: **\*** tuples of two values, the list of
|
||||
consensus methods for generating a set of digests and the 'algorithm =>
|
||||
digest' mappings
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
|
||||
'digest': (None, _parse_r_line),
|
||||
'or_addresses': ([], _parse_a_line),
|
||||
|
||||
'bandwidth': (None, _parse_w_line),
|
||||
'measured': (None, _parse_w_line),
|
||||
'is_unmeasured': (False, _parse_w_line),
|
||||
'unrecognized_bandwidth_entries': ([], _parse_w_line),
|
||||
|
||||
'exit_policy': (None, _parse_p_line),
|
||||
'microdescriptor_hashes': ([], _parse_m_line),
|
||||
})
|
||||
|
||||
PARSER_FOR_LINE = dict(RouterStatusEntry.PARSER_FOR_LINE, **{
|
||||
'a': _parse_a_line,
|
||||
'w': _parse_w_line,
|
||||
'p': _parse_p_line,
|
||||
'm': _parse_m_line,
|
||||
})
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
return 'Router status entries (v3)' if is_plural else 'Router status entry (v3)'
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r', 's')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v', 'w', 'p')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryV3):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class RouterStatusEntryMicroV3(RouterStatusEntry):
|
||||
"""
|
||||
Information about an individual router stored within a microdescriptor
|
||||
flavored network status document.
|
||||
|
||||
:var int bandwidth: bandwidth claimed by the relay (in kb/s)
|
||||
:var int measured: bandwidth measured to be available by the relay
|
||||
:var bool is_unmeasured: bandwidth measurement isn't based on three or more
|
||||
measurements
|
||||
:var list unrecognized_bandwidth_entries: **\*** bandwidth weighting
|
||||
information that isn't yet recognized
|
||||
|
||||
:var str digest: **\*** router's hex encoded digest of our corresponding microdescriptor
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(RouterStatusEntry.ATTRIBUTES, **{
|
||||
'bandwidth': (None, _parse_w_line),
|
||||
'measured': (None, _parse_w_line),
|
||||
'is_unmeasured': (False, _parse_w_line),
|
||||
'unrecognized_bandwidth_entries': ([], _parse_w_line),
|
||||
|
||||
'digest': (None, _parse_microdescriptor_m_line),
|
||||
})
|
||||
|
||||
PARSER_FOR_LINE = dict(RouterStatusEntry.PARSER_FOR_LINE, **{
|
||||
'w': _parse_w_line,
|
||||
'm': _parse_microdescriptor_m_line,
|
||||
})
|
||||
|
||||
def _name(self, is_plural = False):
|
||||
return 'Router status entries (micro v3)' if is_plural else 'Router status entry (micro v3)'
|
||||
|
||||
def _required_fields(self):
|
||||
return ('r', 's', 'm')
|
||||
|
||||
def _single_fields(self):
|
||||
return ('r', 's', 'v', 'w', 'm')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RouterStatusEntryMicroV3):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
|
@ -0,0 +1,822 @@
|
|||
# Copyright 2012-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for Tor server descriptors, which contains the infrequently changing
|
||||
information about a Tor relay (contact information, exit policy, public keys,
|
||||
etc). This information is provided from a few sources...
|
||||
|
||||
* The control port via 'GETINFO desc/\*' queries.
|
||||
|
||||
* The 'cached-descriptors' file in Tor's data directory.
|
||||
|
||||
* Archived descriptors provided by CollecTor
|
||||
(https://collector.torproject.org/).
|
||||
|
||||
* Directory authorities and mirrors via their DirPort.
|
||||
|
||||
**Module Overview:**
|
||||
|
||||
::
|
||||
|
||||
ServerDescriptor - Tor server descriptor.
|
||||
|- RelayDescriptor - Server descriptor for a relay.
|
||||
|
|
||||
|- BridgeDescriptor - Scrubbed server descriptor for a bridge.
|
||||
| |- is_scrubbed - checks if our content has been properly scrubbed
|
||||
| +- get_scrubbing_issues - description of issues with our scrubbing
|
||||
|
|
||||
|- digest - calculates the upper-case hex digest value for our content
|
||||
|- get_annotations - dictionary of content prior to the descriptor entry
|
||||
+- get_annotation_lines - lines that provided the annotations
|
||||
"""
|
||||
|
||||
import functools
|
||||
import hashlib
|
||||
import re
|
||||
|
||||
import stem.descriptor.extrainfo_descriptor
|
||||
import stem.exit_policy
|
||||
import stem.prereq
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
import stem.version
|
||||
|
||||
from stem import str_type
|
||||
|
||||
from stem.descriptor import (
|
||||
PGP_BLOCK_END,
|
||||
Descriptor,
|
||||
_get_descriptor_components,
|
||||
_read_until_keywords,
|
||||
_bytes_for_block,
|
||||
_value,
|
||||
_values,
|
||||
_parse_simple_line,
|
||||
_parse_bytes_line,
|
||||
_parse_timestamp_line,
|
||||
_parse_forty_character_hex,
|
||||
_parse_key_block,
|
||||
)
|
||||
|
||||
try:
|
||||
# added in python 3.2
|
||||
from functools import lru_cache
|
||||
except ImportError:
|
||||
from stem.util.lru_cache import lru_cache
|
||||
|
||||
# relay descriptors must have exactly one of the following
|
||||
REQUIRED_FIELDS = (
|
||||
'router',
|
||||
'bandwidth',
|
||||
'published',
|
||||
'onion-key',
|
||||
'signing-key',
|
||||
'router-signature',
|
||||
)
|
||||
|
||||
# optional entries that can appear at most once
|
||||
SINGLE_FIELDS = (
|
||||
'platform',
|
||||
'fingerprint',
|
||||
'hibernating',
|
||||
'uptime',
|
||||
'contact',
|
||||
'read-history',
|
||||
'write-history',
|
||||
'eventdns',
|
||||
'family',
|
||||
'caches-extra-info',
|
||||
'extra-info-digest',
|
||||
'hidden-service-dir',
|
||||
'protocols',
|
||||
'allow-single-hop-exits',
|
||||
'ntor-onion-key',
|
||||
)
|
||||
|
||||
DEFAULT_IPV6_EXIT_POLICY = stem.exit_policy.MicroExitPolicy('reject 1-65535')
|
||||
REJECT_ALL_POLICY = stem.exit_policy.ExitPolicy('reject *:*')
|
||||
|
||||
|
||||
def _parse_file(descriptor_file, is_bridge = False, validate = False, **kwargs):
|
||||
"""
|
||||
Iterates over the server descriptors in a file.
|
||||
|
||||
:param file descriptor_file: file with descriptor content
|
||||
:param bool is_bridge: parses the file as being a bridge descriptor
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param dict kwargs: additional arguments for the descriptor constructor
|
||||
|
||||
:returns: iterator for ServerDescriptor instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is True
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
# Handler for relay descriptors
|
||||
#
|
||||
# Cached descriptors consist of annotations followed by the descriptor
|
||||
# itself. For instance...
|
||||
#
|
||||
# @downloaded-at 2012-03-14 16:31:05
|
||||
# @source "145.53.65.130"
|
||||
# router caerSidi 71.35.143.157 9001 0 0
|
||||
# platform Tor 0.2.1.30 on Linux x86_64
|
||||
# <rest of the descriptor content>
|
||||
# router-signature
|
||||
# -----BEGIN SIGNATURE-----
|
||||
# <signature for the above descriptor>
|
||||
# -----END SIGNATURE-----
|
||||
#
|
||||
# Metrics descriptor files are the same, but lack any annotations. The
|
||||
# following simply does the following...
|
||||
#
|
||||
# - parse as annotations until we get to 'router'
|
||||
# - parse as descriptor content until we get to 'router-signature' followed
|
||||
# by the end of the signature block
|
||||
# - construct a descriptor and provide it back to the caller
|
||||
#
|
||||
# Any annotations after the last server descriptor is ignored (never provided
|
||||
# to the caller).
|
||||
|
||||
while True:
|
||||
annotations = _read_until_keywords('router', descriptor_file)
|
||||
|
||||
if not is_bridge:
|
||||
descriptor_content = _read_until_keywords('router-signature', descriptor_file)
|
||||
|
||||
# we've reached the 'router-signature', now include the pgp style block
|
||||
|
||||
block_end_prefix = PGP_BLOCK_END.split(' ', 1)[0]
|
||||
descriptor_content += _read_until_keywords(block_end_prefix, descriptor_file, True)
|
||||
else:
|
||||
descriptor_content = _read_until_keywords('router-digest', descriptor_file, True)
|
||||
|
||||
if descriptor_content:
|
||||
if descriptor_content[0].startswith(b'@type'):
|
||||
descriptor_content = descriptor_content[1:]
|
||||
|
||||
# strip newlines from annotations
|
||||
annotations = list(map(bytes.strip, annotations))
|
||||
|
||||
descriptor_text = bytes.join(b'', descriptor_content)
|
||||
|
||||
if is_bridge:
|
||||
yield BridgeDescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
yield RelayDescriptor(descriptor_text, validate, annotations, **kwargs)
|
||||
else:
|
||||
if validate and annotations:
|
||||
orphaned_annotations = stem.util.str_tools._to_unicode(b'\n'.join(annotations))
|
||||
raise ValueError('Content conform to being a server descriptor:\n%s' % orphaned_annotations)
|
||||
|
||||
break # done parsing descriptors
|
||||
|
||||
|
||||
def _parse_router_line(descriptor, entries):
|
||||
# "router" nickname address ORPort SocksPort DirPort
|
||||
|
||||
value = _value('router', entries)
|
||||
router_comp = value.split()
|
||||
|
||||
if len(router_comp) < 5:
|
||||
raise ValueError('Router line must have five values: router %s' % value)
|
||||
elif not stem.util.tor_tools.is_valid_nickname(router_comp[0]):
|
||||
raise ValueError("Router line entry isn't a valid nickname: %s" % router_comp[0])
|
||||
elif not stem.util.connection.is_valid_ipv4_address(router_comp[1]):
|
||||
raise ValueError("Router line entry isn't a valid IPv4 address: %s" % router_comp[1])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[2], allow_zero = True):
|
||||
raise ValueError("Router line's ORPort is invalid: %s" % router_comp[2])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[3], allow_zero = True):
|
||||
raise ValueError("Router line's SocksPort is invalid: %s" % router_comp[3])
|
||||
elif not stem.util.connection.is_valid_port(router_comp[4], allow_zero = True):
|
||||
raise ValueError("Router line's DirPort is invalid: %s" % router_comp[4])
|
||||
|
||||
descriptor.nickname = router_comp[0]
|
||||
descriptor.address = router_comp[1]
|
||||
descriptor.or_port = int(router_comp[2])
|
||||
descriptor.socks_port = None if router_comp[3] == '0' else int(router_comp[3])
|
||||
descriptor.dir_port = None if router_comp[4] == '0' else int(router_comp[4])
|
||||
|
||||
|
||||
def _parse_bandwidth_line(descriptor, entries):
|
||||
# "bandwidth" bandwidth-avg bandwidth-burst bandwidth-observed
|
||||
|
||||
value = _value('bandwidth', entries)
|
||||
bandwidth_comp = value.split()
|
||||
|
||||
if len(bandwidth_comp) < 3:
|
||||
raise ValueError('Bandwidth line must have three values: bandwidth %s' % value)
|
||||
elif not bandwidth_comp[0].isdigit():
|
||||
raise ValueError("Bandwidth line's average rate isn't numeric: %s" % bandwidth_comp[0])
|
||||
elif not bandwidth_comp[1].isdigit():
|
||||
raise ValueError("Bandwidth line's burst rate isn't numeric: %s" % bandwidth_comp[1])
|
||||
elif not bandwidth_comp[2].isdigit():
|
||||
raise ValueError("Bandwidth line's observed rate isn't numeric: %s" % bandwidth_comp[2])
|
||||
|
||||
descriptor.average_bandwidth = int(bandwidth_comp[0])
|
||||
descriptor.burst_bandwidth = int(bandwidth_comp[1])
|
||||
descriptor.observed_bandwidth = int(bandwidth_comp[2])
|
||||
|
||||
|
||||
def _parse_platform_line(descriptor, entries):
|
||||
# "platform" string
|
||||
|
||||
_parse_bytes_line('platform', 'platform')(descriptor, entries)
|
||||
|
||||
# The platform attribute was set earlier. This line can contain any
|
||||
# arbitrary data, but tor seems to report its version followed by the
|
||||
# os like the following...
|
||||
#
|
||||
# platform Tor 0.2.2.35 (git-73ff13ab3cc9570d) on Linux x86_64
|
||||
#
|
||||
# There's no guarantee that we'll be able to pick these out the
|
||||
# version, but might as well try to save our caller the effort.
|
||||
|
||||
value = _value('platform', entries)
|
||||
platform_match = re.match('^(?:node-)?Tor (\S*).* on (.*)$', value)
|
||||
|
||||
if platform_match:
|
||||
version_str, descriptor.operating_system = platform_match.groups()
|
||||
|
||||
try:
|
||||
descriptor.tor_version = stem.version._get_version(version_str)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def _parse_fingerprint_line(descriptor, entries):
|
||||
# This is forty hex digits split into space separated groups of four.
|
||||
# Checking that we match this pattern.
|
||||
|
||||
value = _value('fingerprint', entries)
|
||||
fingerprint = value.replace(' ', '')
|
||||
|
||||
for grouping in value.split(' '):
|
||||
if len(grouping) != 4:
|
||||
raise ValueError('Fingerprint line should have groupings of four hex digits: %s' % value)
|
||||
|
||||
if not stem.util.tor_tools.is_valid_fingerprint(fingerprint):
|
||||
raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % value)
|
||||
|
||||
descriptor.fingerprint = fingerprint
|
||||
|
||||
|
||||
def _parse_hibernating_line(descriptor, entries):
|
||||
# "hibernating" 0|1 (in practice only set if one)
|
||||
|
||||
value = _value('hibernating', entries)
|
||||
|
||||
if value not in ('0', '1'):
|
||||
raise ValueError('Hibernating line had an invalid value, must be zero or one: %s' % value)
|
||||
|
||||
descriptor.hibernating = value == '1'
|
||||
|
||||
|
||||
def _parse_hidden_service_dir_line(descriptor, entries):
|
||||
value = _value('hidden-service-dir', entries)
|
||||
|
||||
if value:
|
||||
descriptor.hidden_service_dir = value.split(' ')
|
||||
else:
|
||||
descriptor.hidden_service_dir = ['2']
|
||||
|
||||
|
||||
def _parse_uptime_line(descriptor, entries):
|
||||
# We need to be tolerant of negative uptimes to accommodate a past tor
|
||||
# bug...
|
||||
#
|
||||
# Changes in version 0.1.2.7-alpha - 2007-02-06
|
||||
# - If our system clock jumps back in time, don't publish a negative
|
||||
# uptime in the descriptor. Also, don't let the global rate limiting
|
||||
# buckets go absurdly negative.
|
||||
#
|
||||
# After parsing all of the attributes we'll double check that negative
|
||||
# uptimes only occurred prior to this fix.
|
||||
|
||||
value = _value('uptime', entries)
|
||||
|
||||
try:
|
||||
descriptor.uptime = int(value)
|
||||
except ValueError:
|
||||
raise ValueError('Uptime line must have an integer value: %s' % value)
|
||||
|
||||
|
||||
def _parse_protocols_line(descriptor, entries):
|
||||
value = _value('protocols', entries)
|
||||
protocols_match = re.match('^Link (.*) Circuit (.*)$', value)
|
||||
|
||||
if not protocols_match:
|
||||
raise ValueError('Protocols line did not match the expected pattern: protocols %s' % value)
|
||||
|
||||
link_versions, circuit_versions = protocols_match.groups()
|
||||
descriptor.link_protocols = link_versions.split(' ')
|
||||
descriptor.circuit_protocols = circuit_versions.split(' ')
|
||||
|
||||
|
||||
def _parse_or_address_line(descriptor, entries):
|
||||
all_values = _values('or-address', entries)
|
||||
or_addresses = []
|
||||
|
||||
for entry in all_values:
|
||||
line = 'or-address %s' % entry
|
||||
|
||||
if ':' not in entry:
|
||||
raise ValueError('or-address line missing a colon: %s' % line)
|
||||
|
||||
address, port = entry.rsplit(':', 1)
|
||||
is_ipv6 = address.startswith('[') and address.endswith(']')
|
||||
|
||||
if is_ipv6:
|
||||
address = address[1:-1] # remove brackets
|
||||
|
||||
if not ((not is_ipv6 and stem.util.connection.is_valid_ipv4_address(address)) or
|
||||
(is_ipv6 and stem.util.connection.is_valid_ipv6_address(address))):
|
||||
raise ValueError('or-address line has a malformed address: %s' % line)
|
||||
|
||||
if not stem.util.connection.is_valid_port(port):
|
||||
raise ValueError('or-address line has a malformed port: %s' % line)
|
||||
|
||||
or_addresses.append((address, int(port), is_ipv6))
|
||||
|
||||
descriptor.or_addresses = or_addresses
|
||||
|
||||
|
||||
def _parse_history_line(keyword, history_end_attribute, history_interval_attribute, history_values_attribute, descriptor, entries):
|
||||
value = _value(keyword, entries)
|
||||
timestamp, interval, remainder = stem.descriptor.extrainfo_descriptor._parse_timestamp_and_interval(keyword, value)
|
||||
|
||||
try:
|
||||
if remainder:
|
||||
history_values = [int(entry) for entry in remainder.split(',')]
|
||||
else:
|
||||
history_values = []
|
||||
except ValueError:
|
||||
raise ValueError('%s line has non-numeric values: %s %s' % (keyword, keyword, value))
|
||||
|
||||
setattr(descriptor, history_end_attribute, timestamp)
|
||||
setattr(descriptor, history_interval_attribute, interval)
|
||||
setattr(descriptor, history_values_attribute, history_values)
|
||||
|
||||
|
||||
def _parse_exit_policy(descriptor, entries):
|
||||
if hasattr(descriptor, '_unparsed_exit_policy'):
|
||||
if descriptor._unparsed_exit_policy == [str_type('reject *:*')]:
|
||||
descriptor.exit_policy = REJECT_ALL_POLICY
|
||||
else:
|
||||
descriptor.exit_policy = stem.exit_policy.ExitPolicy(*descriptor._unparsed_exit_policy)
|
||||
|
||||
del descriptor._unparsed_exit_policy
|
||||
|
||||
|
||||
_parse_contact_line = _parse_bytes_line('contact', 'contact')
|
||||
_parse_published_line = _parse_timestamp_line('published', 'published')
|
||||
_parse_extrainfo_digest_line = _parse_forty_character_hex('extra-info-digest', 'extra_info_digest')
|
||||
_parse_read_history_line = functools.partial(_parse_history_line, 'read-history', 'read_history_end', 'read_history_interval', 'read_history_values')
|
||||
_parse_write_history_line = functools.partial(_parse_history_line, 'write-history', 'write_history_end', 'write_history_interval', 'write_history_values')
|
||||
_parse_ipv6_policy_line = lambda descriptor, entries: setattr(descriptor, 'exit_policy_v6', stem.exit_policy.MicroExitPolicy(_value('ipv6-policy', entries)))
|
||||
_parse_allow_single_hop_exits_line = lambda descriptor, entries: setattr(descriptor, 'allow_single_hop_exits', 'allow_single_hop_exits' in entries)
|
||||
_parse_caches_extra_info_line = lambda descriptor, entries: setattr(descriptor, 'extra_info_cache', 'extra_info_cache' in entries)
|
||||
_parse_family_line = lambda descriptor, entries: setattr(descriptor, 'family', set(_value('family', entries).split(' ')))
|
||||
_parse_eventdns_line = lambda descriptor, entries: setattr(descriptor, 'eventdns', _value('eventdns', entries) == '1')
|
||||
_parse_onion_key_line = _parse_key_block('onion-key', 'onion_key', 'RSA PUBLIC KEY')
|
||||
_parse_signing_key_line = _parse_key_block('signing-key', 'signing_key', 'RSA PUBLIC KEY')
|
||||
_parse_router_signature_line = _parse_key_block('router-signature', 'signature', 'SIGNATURE')
|
||||
_parse_ntor_onion_key_line = _parse_simple_line('ntor-onion-key', 'ntor_onion_key')
|
||||
_parse_router_digest_line = _parse_forty_character_hex('router-digest', '_digest')
|
||||
|
||||
|
||||
class ServerDescriptor(Descriptor):
|
||||
"""
|
||||
Common parent for server descriptors.
|
||||
|
||||
:var str nickname: **\*** relay's nickname
|
||||
:var str fingerprint: identity key fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
|
||||
:var str address: **\*** IPv4 address of the relay
|
||||
:var int or_port: **\*** port used for relaying
|
||||
:var int socks_port: **\*** port used as client (deprecated, always **None**)
|
||||
:var int dir_port: **\*** port used for descriptor mirroring
|
||||
|
||||
:var bytes platform: line with operating system and tor version
|
||||
:var stem.version.Version tor_version: version of tor
|
||||
:var str operating_system: operating system
|
||||
:var int uptime: uptime when published in seconds
|
||||
:var bytes contact: contact information
|
||||
:var stem.exit_policy.ExitPolicy exit_policy: **\*** stated exit policy
|
||||
:var stem.exit_policy.MicroExitPolicy exit_policy_v6: **\*** exit policy for IPv6
|
||||
:var set family: **\*** nicknames or fingerprints of declared family
|
||||
|
||||
:var int average_bandwidth: **\*** average rate it's willing to relay in bytes/s
|
||||
:var int burst_bandwidth: **\*** burst rate it's willing to relay in bytes/s
|
||||
:var int observed_bandwidth: **\*** estimated capacity based on usage in bytes/s
|
||||
|
||||
:var list link_protocols: link protocols supported by the relay
|
||||
:var list circuit_protocols: circuit protocols supported by the relay
|
||||
:var bool hibernating: **\*** hibernating when published
|
||||
:var bool allow_single_hop_exits: **\*** flag if single hop exiting is allowed
|
||||
:var bool extra_info_cache: **\*** flag if a mirror for extra-info documents
|
||||
:var str extra_info_digest: upper-case hex encoded digest of our extra-info document
|
||||
:var bool eventdns: flag for evdns backend (deprecated, always unset)
|
||||
:var list or_addresses: **\*** alternative for our address/or_port
|
||||
attributes, each entry is a tuple of the form (address (**str**), port
|
||||
(**int**), is_ipv6 (**bool**))
|
||||
|
||||
Deprecated, moved to extra-info descriptor...
|
||||
|
||||
:var datetime read_history_end: end of the sampling interval
|
||||
:var int read_history_interval: seconds per interval
|
||||
:var list read_history_values: bytes read during each interval
|
||||
|
||||
:var datetime write_history_end: end of the sampling interval
|
||||
:var int write_history_interval: seconds per interval
|
||||
:var list write_history_values: bytes written during each interval
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
ATTRIBUTES = {
|
||||
'nickname': (None, _parse_router_line),
|
||||
'fingerprint': (None, _parse_fingerprint_line),
|
||||
'contact': (None, _parse_contact_line),
|
||||
'published': (None, _parse_published_line),
|
||||
'exit_policy': (None, _parse_exit_policy),
|
||||
|
||||
'address': (None, _parse_router_line),
|
||||
'or_port': (None, _parse_router_line),
|
||||
'socks_port': (None, _parse_router_line),
|
||||
'dir_port': (None, _parse_router_line),
|
||||
|
||||
'platform': (None, _parse_platform_line),
|
||||
'tor_version': (None, _parse_platform_line),
|
||||
'operating_system': (None, _parse_platform_line),
|
||||
'uptime': (None, _parse_uptime_line),
|
||||
'exit_policy_v6': (DEFAULT_IPV6_EXIT_POLICY, _parse_ipv6_policy_line),
|
||||
'family': (set(), _parse_family_line),
|
||||
|
||||
'average_bandwidth': (None, _parse_bandwidth_line),
|
||||
'burst_bandwidth': (None, _parse_bandwidth_line),
|
||||
'observed_bandwidth': (None, _parse_bandwidth_line),
|
||||
|
||||
'link_protocols': (None, _parse_protocols_line),
|
||||
'circuit_protocols': (None, _parse_protocols_line),
|
||||
'hibernating': (False, _parse_hibernating_line),
|
||||
'allow_single_hop_exits': (False, _parse_allow_single_hop_exits_line),
|
||||
'extra_info_cache': (False, _parse_caches_extra_info_line),
|
||||
'extra_info_digest': (None, _parse_extrainfo_digest_line),
|
||||
'hidden_service_dir': (None, _parse_hidden_service_dir_line),
|
||||
'eventdns': (None, _parse_eventdns_line),
|
||||
'or_addresses': ([], _parse_or_address_line),
|
||||
|
||||
'read_history_end': (None, _parse_read_history_line),
|
||||
'read_history_interval': (None, _parse_read_history_line),
|
||||
'read_history_values': (None, _parse_read_history_line),
|
||||
|
||||
'write_history_end': (None, _parse_write_history_line),
|
||||
'write_history_interval': (None, _parse_write_history_line),
|
||||
'write_history_values': (None, _parse_write_history_line),
|
||||
}
|
||||
|
||||
PARSER_FOR_LINE = {
|
||||
'router': _parse_router_line,
|
||||
'bandwidth': _parse_bandwidth_line,
|
||||
'platform': _parse_platform_line,
|
||||
'published': _parse_published_line,
|
||||
'fingerprint': _parse_fingerprint_line,
|
||||
'contact': _parse_contact_line,
|
||||
'hibernating': _parse_hibernating_line,
|
||||
'extra-info-digest': _parse_extrainfo_digest_line,
|
||||
'hidden-service-dir': _parse_hidden_service_dir_line,
|
||||
'uptime': _parse_uptime_line,
|
||||
'protocols': _parse_protocols_line,
|
||||
'or-address': _parse_or_address_line,
|
||||
'read-history': _parse_read_history_line,
|
||||
'write-history': _parse_write_history_line,
|
||||
'ipv6-policy': _parse_ipv6_policy_line,
|
||||
'allow-single-hop-exits': _parse_allow_single_hop_exits_line,
|
||||
'caches-extra-info': _parse_caches_extra_info_line,
|
||||
'family': _parse_family_line,
|
||||
'eventdns': _parse_eventdns_line,
|
||||
}
|
||||
|
||||
def __init__(self, raw_contents, validate = False, annotations = None):
|
||||
"""
|
||||
Server descriptor constructor, created from an individual relay's
|
||||
descriptor content (as provided by 'GETINFO desc/*', cached descriptors,
|
||||
and metrics).
|
||||
|
||||
By default this validates the descriptor's content as it's parsed. This
|
||||
validation can be disables to either improve performance or be accepting of
|
||||
malformed data.
|
||||
|
||||
:param str raw_contents: descriptor content provided by the relay
|
||||
:param bool validate: checks the validity of the descriptor's content if
|
||||
**True**, skips these checks otherwise
|
||||
:param list annotations: lines that appeared prior to the descriptor
|
||||
|
||||
:raises: **ValueError** if the contents is malformed and validate is True
|
||||
"""
|
||||
|
||||
super(ServerDescriptor, self).__init__(raw_contents, lazy_load = not validate)
|
||||
self._annotation_lines = annotations if annotations else []
|
||||
|
||||
# A descriptor contains a series of 'keyword lines' which are simply a
|
||||
# keyword followed by an optional value. Lines can also be followed by a
|
||||
# signature block.
|
||||
#
|
||||
# We care about the ordering of 'accept' and 'reject' entries because this
|
||||
# influences the resulting exit policy, but for everything else the order
|
||||
# does not matter so breaking it into key / value pairs.
|
||||
|
||||
entries, self._unparsed_exit_policy = _get_descriptor_components(stem.util.str_tools._to_unicode(raw_contents), validate, ('accept', 'reject'))
|
||||
|
||||
if validate:
|
||||
self._parse(entries, validate)
|
||||
|
||||
_parse_exit_policy(self, entries)
|
||||
|
||||
# if we have a negative uptime and a tor version that shouldn't exhibit
|
||||
# this bug then fail validation
|
||||
|
||||
if validate and self.uptime and self.tor_version:
|
||||
if self.uptime < 0 and self.tor_version >= stem.version.Version('0.1.2.7'):
|
||||
raise ValueError("Descriptor for version '%s' had a negative uptime value: %i" % (self.tor_version, self.uptime))
|
||||
|
||||
self._check_constraints(entries)
|
||||
else:
|
||||
self._entries = entries
|
||||
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the hex encoded sha1 of our content. This value is part of the
|
||||
network status entry for this relay.
|
||||
|
||||
:returns: **unicode** with the upper-case hex digest value for this server descriptor
|
||||
"""
|
||||
|
||||
raise NotImplementedError('Unsupported Operation: this should be implemented by the ServerDescriptor subclass')
|
||||
|
||||
@lru_cache()
|
||||
def get_annotations(self):
|
||||
"""
|
||||
Provides content that appeared prior to the descriptor. If this comes from
|
||||
the cached-descriptors file then this commonly contains content like...
|
||||
|
||||
::
|
||||
|
||||
@downloaded-at 2012-03-18 21:18:29
|
||||
@source "173.254.216.66"
|
||||
|
||||
:returns: **dict** with the key/value pairs in our annotations
|
||||
"""
|
||||
|
||||
annotation_dict = {}
|
||||
|
||||
for line in self._annotation_lines:
|
||||
if b' ' in line:
|
||||
key, value = line.split(b' ', 1)
|
||||
annotation_dict[key] = value
|
||||
else:
|
||||
annotation_dict[line] = None
|
||||
|
||||
return annotation_dict
|
||||
|
||||
def get_annotation_lines(self):
|
||||
"""
|
||||
Provides the lines of content that appeared prior to the descriptor. This
|
||||
is the same as the
|
||||
:func:`~stem.descriptor.server_descriptor.ServerDescriptor.get_annotations`
|
||||
results, but with the unparsed lines and ordering retained.
|
||||
|
||||
:returns: **list** with the lines of annotation that came before this descriptor
|
||||
"""
|
||||
|
||||
return self._annotation_lines
|
||||
|
||||
def _check_constraints(self, entries):
|
||||
"""
|
||||
Does a basic check that the entries conform to this descriptor type's
|
||||
constraints.
|
||||
|
||||
:param dict entries: keyword => (value, pgp key) entries
|
||||
|
||||
:raises: **ValueError** if an issue arises in validation
|
||||
"""
|
||||
|
||||
for keyword in self._required_fields():
|
||||
if keyword not in entries:
|
||||
raise ValueError("Descriptor must have a '%s' entry" % keyword)
|
||||
|
||||
for keyword in self._single_fields():
|
||||
if keyword in entries and len(entries[keyword]) > 1:
|
||||
raise ValueError("The '%s' entry can only appear once in a descriptor" % keyword)
|
||||
|
||||
expected_first_keyword = self._first_keyword()
|
||||
if expected_first_keyword and expected_first_keyword != list(entries.keys())[0]:
|
||||
raise ValueError("Descriptor must start with a '%s' entry" % expected_first_keyword)
|
||||
|
||||
expected_last_keyword = self._last_keyword()
|
||||
if expected_last_keyword and expected_last_keyword != list(entries.keys())[-1]:
|
||||
raise ValueError("Descriptor must end with a '%s' entry" % expected_last_keyword)
|
||||
|
||||
if not self.exit_policy:
|
||||
raise ValueError("Descriptor must have at least one 'accept' or 'reject' entry")
|
||||
|
||||
# Constraints that the descriptor must meet to be valid. These can be None if
|
||||
# not applicable.
|
||||
|
||||
def _required_fields(self):
|
||||
return REQUIRED_FIELDS
|
||||
|
||||
def _single_fields(self):
|
||||
return REQUIRED_FIELDS + SINGLE_FIELDS
|
||||
|
||||
def _first_keyword(self):
|
||||
return 'router'
|
||||
|
||||
def _last_keyword(self):
|
||||
return 'router-signature'
|
||||
|
||||
|
||||
class RelayDescriptor(ServerDescriptor):
|
||||
"""
|
||||
Server descriptor (`descriptor specification
|
||||
<https://gitweb.torproject.org/torspec.git/tree/dir-spec.txt>`_)
|
||||
|
||||
:var str onion_key: **\*** key used to encrypt EXTEND cells
|
||||
:var str ntor_onion_key: base64 key used to encrypt EXTEND in the ntor protocol
|
||||
:var str signing_key: **\*** relay's long-term identity key
|
||||
:var str signature: **\*** signature for this descriptor
|
||||
|
||||
**\*** attribute is required when we're parsed with validation
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(ServerDescriptor.ATTRIBUTES, **{
|
||||
'onion_key': (None, _parse_onion_key_line),
|
||||
'ntor_onion_key': (None, _parse_ntor_onion_key_line),
|
||||
'signing_key': (None, _parse_signing_key_line),
|
||||
'signature': (None, _parse_router_signature_line),
|
||||
})
|
||||
|
||||
PARSER_FOR_LINE = dict(ServerDescriptor.PARSER_FOR_LINE, **{
|
||||
'onion-key': _parse_onion_key_line,
|
||||
'ntor-onion-key': _parse_ntor_onion_key_line,
|
||||
'signing-key': _parse_signing_key_line,
|
||||
'router-signature': _parse_router_signature_line,
|
||||
})
|
||||
|
||||
def __init__(self, raw_contents, validate = False, annotations = None):
|
||||
super(RelayDescriptor, self).__init__(raw_contents, validate, annotations)
|
||||
|
||||
if validate:
|
||||
if self.fingerprint:
|
||||
key_hash = hashlib.sha1(_bytes_for_block(self.signing_key)).hexdigest()
|
||||
|
||||
if key_hash != self.fingerprint.lower():
|
||||
raise ValueError('Fingerprint does not match the hash of our signing key (fingerprint: %s, signing key hash: %s)' % (self.fingerprint.lower(), key_hash))
|
||||
|
||||
if stem.prereq.is_crypto_available():
|
||||
signed_digest = self._digest_for_signature(self.signing_key, self.signature)
|
||||
|
||||
if signed_digest != self.digest():
|
||||
raise ValueError('Decrypted digest does not match local digest (calculated: %s, local: %s)' % (signed_digest, self.digest()))
|
||||
|
||||
@lru_cache()
|
||||
def digest(self):
|
||||
"""
|
||||
Provides the digest of our descriptor's content.
|
||||
|
||||
:returns: the digest string encoded in uppercase hex
|
||||
|
||||
:raises: ValueError if the digest canot be calculated
|
||||
"""
|
||||
|
||||
return self._digest_for_content(b'router ', b'\nrouter-signature\n')
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, RelayDescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
|
||||
|
||||
class BridgeDescriptor(ServerDescriptor):
|
||||
"""
|
||||
Bridge descriptor (`bridge descriptor specification
|
||||
<https://collector.torproject.org/formats.html#bridge-descriptors>`_)
|
||||
"""
|
||||
|
||||
ATTRIBUTES = dict(ServerDescriptor.ATTRIBUTES, **{
|
||||
'_digest': (None, _parse_router_digest_line),
|
||||
})
|
||||
|
||||
PARSER_FOR_LINE = dict(ServerDescriptor.PARSER_FOR_LINE, **{
|
||||
'router-digest': _parse_router_digest_line,
|
||||
})
|
||||
|
||||
def digest(self):
|
||||
return self._digest
|
||||
|
||||
def is_scrubbed(self):
|
||||
"""
|
||||
Checks if we've been properly scrubbed in accordance with the `bridge
|
||||
descriptor specification
|
||||
<https://collector.torproject.org/formats.html#bridge-descriptors>`_.
|
||||
Validation is a moving target so this may not be fully up to date.
|
||||
|
||||
:returns: **True** if we're scrubbed, **False** otherwise
|
||||
"""
|
||||
|
||||
return self.get_scrubbing_issues() == []
|
||||
|
||||
@lru_cache()
|
||||
def get_scrubbing_issues(self):
|
||||
"""
|
||||
Provides issues with our scrubbing.
|
||||
|
||||
:returns: **list** of strings which describe issues we have with our
|
||||
scrubbing, this list is empty if we're properly scrubbed
|
||||
"""
|
||||
|
||||
issues = []
|
||||
|
||||
if not self.address.startswith('10.'):
|
||||
issues.append("Router line's address should be scrubbed to be '10.x.x.x': %s" % self.address)
|
||||
|
||||
if self.contact and self.contact != 'somebody':
|
||||
issues.append("Contact line should be scrubbed to be 'somebody', but instead had '%s'" % self.contact)
|
||||
|
||||
for address, _, is_ipv6 in self.or_addresses:
|
||||
if not is_ipv6 and not address.startswith('10.'):
|
||||
issues.append("or-address line's address should be scrubbed to be '10.x.x.x': %s" % address)
|
||||
elif is_ipv6 and not address.startswith('fd9f:2e19:3bcf::'):
|
||||
# TODO: this check isn't quite right because we aren't checking that
|
||||
# the next grouping of hex digits contains 1-2 digits
|
||||
issues.append("or-address line's address should be scrubbed to be 'fd9f:2e19:3bcf::xx:xxxx': %s" % address)
|
||||
|
||||
for line in self.get_unrecognized_lines():
|
||||
if line.startswith('onion-key '):
|
||||
issues.append('Bridge descriptors should have their onion-key scrubbed: %s' % line)
|
||||
elif line.startswith('signing-key '):
|
||||
issues.append('Bridge descriptors should have their signing-key scrubbed: %s' % line)
|
||||
elif line.startswith('router-signature '):
|
||||
issues.append('Bridge descriptors should have their signature scrubbed: %s' % line)
|
||||
|
||||
return issues
|
||||
|
||||
def _required_fields(self):
|
||||
# bridge required fields are the same as a relay descriptor, minus items
|
||||
# excluded according to the format page
|
||||
|
||||
excluded_fields = [
|
||||
'onion-key',
|
||||
'signing-key',
|
||||
'router-signature',
|
||||
]
|
||||
|
||||
included_fields = [
|
||||
'router-digest',
|
||||
]
|
||||
|
||||
return tuple(included_fields + [f for f in REQUIRED_FIELDS if f not in excluded_fields])
|
||||
|
||||
def _single_fields(self):
|
||||
return self._required_fields() + SINGLE_FIELDS
|
||||
|
||||
def _last_keyword(self):
|
||||
return None
|
||||
|
||||
def _compare(self, other, method):
|
||||
if not isinstance(other, BridgeDescriptor):
|
||||
return False
|
||||
|
||||
return method(str(self).strip(), str(other).strip())
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self).strip())
|
||||
|
||||
def __eq__(self, other):
|
||||
return self._compare(other, lambda s, o: s == o)
|
||||
|
||||
def __lt__(self, other):
|
||||
return self._compare(other, lambda s, o: s < o)
|
||||
|
||||
def __le__(self, other):
|
||||
return self._compare(other, lambda s, o: s <= o)
|
||||
117
Shared/lib/python3.4/site-packages/stem/descriptor/tordnsel.py
Normal file
117
Shared/lib/python3.4/site-packages/stem/descriptor/tordnsel.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
# Copyright 2013-2015, Damian Johnson and The Tor Project
|
||||
# See LICENSE for licensing information
|
||||
|
||||
"""
|
||||
Parsing for `TorDNSEL <https://www.torproject.org/projects/tordnsel.html.en>`_
|
||||
exit list files.
|
||||
|
||||
::
|
||||
|
||||
TorDNSEL - Exit list provided by TorDNSEL
|
||||
"""
|
||||
|
||||
import stem.util.connection
|
||||
import stem.util.str_tools
|
||||
import stem.util.tor_tools
|
||||
|
||||
from stem.descriptor import (
|
||||
Descriptor,
|
||||
_read_until_keywords,
|
||||
_get_descriptor_components,
|
||||
)
|
||||
|
||||
|
||||
def _parse_file(tordnsel_file, validate = False, **kwargs):
|
||||
"""
|
||||
Iterates over a tordnsel file.
|
||||
|
||||
:returns: iterator for :class:`~stem.descriptor.tordnsel.TorDNSEL`
|
||||
instances in the file
|
||||
|
||||
:raises:
|
||||
* **ValueError** if the contents is malformed and validate is **True**
|
||||
* **IOError** if the file can't be read
|
||||
"""
|
||||
|
||||
# skip content prior to the first ExitNode
|
||||
_read_until_keywords('ExitNode', tordnsel_file, skip = True)
|
||||
|
||||
while True:
|
||||
contents = _read_until_keywords('ExitAddress', tordnsel_file)
|
||||
contents += _read_until_keywords('ExitNode', tordnsel_file)
|
||||
|
||||
if contents:
|
||||
yield TorDNSEL(bytes.join(b'', contents), validate, **kwargs)
|
||||
else:
|
||||
break # done parsing file
|
||||
|
||||
|
||||
class TorDNSEL(Descriptor):
|
||||
"""
|
||||
TorDNSEL descriptor (`exitlist specification
|
||||
<https://www.torproject.org/tordnsel/exitlist-spec.txt>`_)
|
||||
|
||||
:var str fingerprint: **\*** authority's fingerprint
|
||||
:var datetime published: **\*** time in UTC when this descriptor was made
|
||||
:var datetime last_status: **\*** time in UTC when the relay was seen in a v2 network status
|
||||
:var list exit_addresses: **\*** list of (str address, datetime date) tuples consisting of the found IPv4 exit address and the time
|
||||
|
||||
**\*** attribute is either required when we're parsed with validation or has
|
||||
a default value, others are left as **None** if undefined
|
||||
"""
|
||||
|
||||
def __init__(self, raw_contents, validate):
|
||||
super(TorDNSEL, self).__init__(raw_contents)
|
||||
raw_contents = stem.util.str_tools._to_unicode(raw_contents)
|
||||
entries = _get_descriptor_components(raw_contents, validate)
|
||||
|
||||
self.fingerprint = None
|
||||
self.published = None
|
||||
self.last_status = None
|
||||
self.exit_addresses = []
|
||||
|
||||
self._parse(entries, validate)
|
||||
|
||||
def _parse(self, entries, validate):
|
||||
|
||||
for keyword, values in list(entries.items()):
|
||||
value, block_type, block_content = values[0]
|
||||
|
||||
if validate and block_content:
|
||||
raise ValueError('Unexpected block content: %s' % block_content)
|
||||
|
||||
if keyword == 'ExitNode':
|
||||
if validate and not stem.util.tor_tools.is_valid_fingerprint(value):
|
||||
raise ValueError('Tor relay fingerprints consist of forty hex digits: %s' % value)
|
||||
|
||||
self.fingerprint = value
|
||||
elif keyword == 'Published':
|
||||
try:
|
||||
self.published = stem.util.str_tools._parse_timestamp(value)
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("Published time wasn't parsable: %s" % value)
|
||||
elif keyword == 'LastStatus':
|
||||
try:
|
||||
self.last_status = stem.util.str_tools._parse_timestamp(value)
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("LastStatus time wasn't parsable: %s" % value)
|
||||
elif keyword == 'ExitAddress':
|
||||
for value, block_type, block_content in values:
|
||||
address, date = value.split(' ', 1)
|
||||
|
||||
if validate:
|
||||
if not stem.util.connection.is_valid_ipv4_address(address):
|
||||
raise ValueError("ExitAddress isn't a valid IPv4 address: %s" % address)
|
||||
elif block_content:
|
||||
raise ValueError('Unexpected block content: %s' % block_content)
|
||||
|
||||
try:
|
||||
date = stem.util.str_tools._parse_timestamp(date)
|
||||
self.exit_addresses.append((address, date))
|
||||
except ValueError:
|
||||
if validate:
|
||||
raise ValueError("ExitAddress found time wasn't parsable: %s" % value)
|
||||
elif validate:
|
||||
raise ValueError('Unrecognized keyword: %s' % keyword)
|
||||
Loading…
Add table
Add a link
Reference in a new issue