add tor deps

This commit is contained in:
j 2015-11-23 22:13:53 +01:00
commit 1f23120cc3
91 changed files with 25537 additions and 535 deletions

View file

@ -0,0 +1,20 @@
# Copyright 2011-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Utility functions used by the stem library.
"""
__all__ = [
'conf',
'connection',
'enum',
'log',
'lru_cache',
'ordereddict',
'proc',
'system',
'term',
'test_tools',
'tor_tools',
]

View file

@ -0,0 +1,745 @@
# Copyright 2011-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Handlers for text configuration files. Configurations are simple string to
string mappings, with the configuration files using the following rules...
* the key/value is separated by a space
* anything after a '#' is ignored as a comment
* excess whitespace is trimmed
* empty lines are ignored
* multi-line values can be defined by following the key with lines starting
with a '|'
For instance...
::
# This is my sample config
user.name Galen
user.password yabba1234 # here's an inline comment
user.notes takes a fancy to pepperjack cheese
blankEntry.example
msg.greeting
|Multi-line message exclaiming of the
|wonder and awe that is pepperjack!
... would be loaded as...
::
config = {
'user.name': 'Galen',
'user.password': 'yabba1234',
'user.notes': 'takes a fancy to pepperjack cheese',
'blankEntry.example': '',
'msg.greeting': 'Multi-line message exclaiming of the\\nwonder and awe that is pepperjack!',
}
Configurations are managed via the :class:`~stem.util.conf.Config` class. The
:class:`~stem.util.conf.Config` can be be used directly with its
:func:`~stem.util.conf.Config.get` and :func:`~stem.util.conf.Config.set`
methods, but usually modules will want a local dictionary with just the
configurations that it cares about.
To do this use the :func:`~stem.util.conf.config_dict` function. For example...
::
import getpass
from stem.util import conf, connection
def config_validator(key, value):
if key == 'timeout':
# require at least a one second timeout
return max(1, value)
elif key == 'endpoint':
if not connection.is_valid_ipv4_address(value):
raise ValueError("'%s' isn't a valid IPv4 address" % value)
elif key == 'port':
if not connection.is_valid_port(value):
raise ValueError("'%s' isn't a valid port" % value)
elif key == 'retries':
# negative retries really don't make sense
return max(0, value)
CONFIG = conf.config_dict('ssh_login', {
'username': getpass.getuser(),
'password': '',
'timeout': 10,
'endpoint': '263.12.8.0',
'port': 22,
'reconnect': False,
'retries': 3,
}, config_validator)
There's several things going on here so lets take it step by step...
* The :func:`~stem.util.conf.config_dict` provides a dictionary that's bound
to a given configuration. If the "ssh_proxy_config" configuration changes
then so will the contents of CONFIG.
* The dictionary we're passing to :func:`~stem.util.conf.config_dict` provides
two important pieces of information: default values and their types. See the
Config's :func:`~stem.util.conf.Config.get` method for how these type
inferences work.
* The config_validator is a hook we're adding to make sure CONFIG only gets
values we think are valid. In this case it ensures that our timeout value
is at least one second, and rejects endpoints or ports that are invalid.
Now lets say our user has the following configuration file...
::
username waddle_doo
password jabberwocky
timeout -15
port 9000000
retries lots
reconnect true
logging debug
... and we load it as follows...
::
>>> from stem.util import conf
>>> our_config = conf.get_config('ssh_login')
>>> our_config.load('/home/atagar/user_config')
>>> print CONFIG # doctest: +SKIP
{
"username": "waddle_doo",
"password": "jabberwocky",
"timeout": 1,
"endpoint": "263.12.8.0",
"port": 22,
"reconnect": True,
"retries": 3,
}
Here's an expanation of what happened...
* the username, password, and reconnect attributes took the values in the
configuration file
* the 'config_validator' we added earlier allows for a minimum timeout of one
and rejected the invalid port (with a log message)
* we weren't able to convert the retries' "lots" value to an integer so it kept
its default value and logged a warning
* the user didn't supply an endpoint so that remained unchanged
* our CONFIG didn't have a 'logging' attribute so it was ignored
**Module Overview:**
::
config_dict - provides a dictionary that's kept in sync with our config
get_config - singleton for getting configurations
uses_settings - provides an annotation for functions that use configurations
parse_enum_csv - helper funcion for parsing confguration entries for enums
Config - Custom configuration
|- load - reads a configuration file
|- save - writes the current configuration to a file
|- clear - empties our loaded configuration contents
|- add_listener - notifies the given listener when an update occurs
|- clear_listeners - removes any attached listeners
|- keys - provides keys in the loaded configuration
|- set - sets the given key/value pair
|- unused_keys - provides keys that have never been requested
|- get - provides the value for a given key, with type inference
+- get_value - provides the value for a given key as a string
"""
import inspect
import os
import threading
from stem.util import log
try:
# added in python 2.7
from collections import OrderedDict
except ImportError:
from stem.util.ordereddict import OrderedDict
CONFS = {} # mapping of identifier to singleton instances of configs
class _SyncListener(object):
def __init__(self, config_dict, interceptor):
self.config_dict = config_dict
self.interceptor = interceptor
def update(self, config, key):
if key in self.config_dict:
new_value = config.get(key, self.config_dict[key])
if new_value == self.config_dict[key]:
return # no change
if self.interceptor:
interceptor_value = self.interceptor(key, new_value)
if interceptor_value:
new_value = interceptor_value
self.config_dict[key] = new_value
def config_dict(handle, conf_mappings, handler = None):
"""
Makes a dictionary that stays synchronized with a configuration.
This takes a dictionary of 'config_key => default_value' mappings and
changes the values to reflect our current configuration. This will leave
the previous values alone if...
* we don't have a value for that config_key
* we can't convert our value to be the same type as the default_value
If a handler is provided then this is called just prior to assigning new
values to the config_dict. The handler function is expected to accept the
(key, value) for the new values and return what we should actually insert
into the dictionary. If this returns None then the value is updated as
normal.
For more information about how we convert types see our
:func:`~stem.util.conf.Config.get` method.
**The dictionary you get from this is manged by the
:class:`~stem.util.conf.Config` class and should be treated as being
read-only.**
:param str handle: unique identifier for a config instance
:param dict conf_mappings: config key/value mappings used as our defaults
:param functor handler: function referred to prior to assigning values
"""
selected_config = get_config(handle)
selected_config.add_listener(_SyncListener(conf_mappings, handler).update)
return conf_mappings
def get_config(handle):
"""
Singleton constructor for configuration file instances. If a configuration
already exists for the handle then it's returned. Otherwise a fresh instance
is constructed.
:param str handle: unique identifier used to access this config instance
"""
if handle not in CONFS:
CONFS[handle] = Config()
return CONFS[handle]
def uses_settings(handle, path, lazy_load = True):
"""
Provides a function that can be used as a decorator for other functions that
require settings to be loaded. Functions with this decorator will be provided
with the configuration as its 'config' keyword argument.
.. versionchanged:: 1.3.0
Omits the 'config' argument if the funcion we're decorating doesn't accept
it.
::
uses_settings = stem.util.conf.uses_settings('my_app', '/path/to/settings.cfg')
@uses_settings
def my_function(config):
print 'hello %s!' % config.get('username', '')
:param str handle: hande for the configuration
:param str path: path where the configuration should be loaded from
:param bool lazy_load: loads the configuration file when the decorator is
used if true, otherwise it's loaded right away
:returns: **function** that can be used as a decorator to provide the
configuration
:raises: **IOError** if we fail to read the configuration file, if
**lazy_load** is true then this arises when we use the decorator
"""
config = get_config(handle)
if not lazy_load and not config.get('settings_loaded', False):
config.load(path)
config.set('settings_loaded', 'true')
def decorator(func):
def wrapped(*args, **kwargs):
if lazy_load and not config.get('settings_loaded', False):
config.load(path)
config.set('settings_loaded', 'true')
if 'config' in inspect.getargspec(func).args:
return func(*args, config = config, **kwargs)
else:
return func(*args, **kwargs)
return wrapped
return decorator
def parse_enum(key, value, enumeration):
"""
Provides the enumeration value for a given key. This is a case insensitive
lookup and raises an exception if the enum key doesn't exist.
:param str key: configuration key being looked up
:param str value: value to be parsed
:param stem.util.enum.Enum enumeration: enumeration the values should be in
:returns: enumeration value
:raises: **ValueError** if the **value** isn't among the enumeration keys
"""
return parse_enum_csv(key, value, enumeration, 1)[0]
def parse_enum_csv(key, value, enumeration, count = None):
"""
Parses a given value as being a comma separated listing of enumeration keys,
returning the corresponding enumeration values. This is intended to be a
helper for config handlers. The checks this does are case insensitive.
The **count** attribute can be used to make assertions based on the number of
values. This can be...
* None to indicate that there's no restrictions.
* An int to indicate that we should have this many values.
* An (int, int) tuple to indicate the range that values can be in. This range
is inclusive and either can be None to indicate the lack of a lower or
upper bound.
:param str key: configuration key being looked up
:param str value: value to be parsed
:param stem.util.enum.Enum enumeration: enumeration the values should be in
:param int,tuple count: validates that we have this many items
:returns: list with the enumeration values
:raises: **ValueError** if the count assertion fails or the **value** entries
don't match the enumeration keys
"""
values = [val.upper().strip() for val in value.split(',')]
if values == ['']:
return []
if count is None:
pass # no count validateion checks to do
elif isinstance(count, int):
if len(values) != count:
raise ValueError("Config entry '%s' is expected to be %i comma separated values, got '%s'" % (key, count, value))
elif isinstance(count, tuple) and len(count) == 2:
minimum, maximum = count
if minimum is not None and len(values) < minimum:
raise ValueError("Config entry '%s' must have at least %i comma separated values, got '%s'" % (key, minimum, value))
if maximum is not None and len(values) > maximum:
raise ValueError("Config entry '%s' can have at most %i comma separated values, got '%s'" % (key, maximum, value))
else:
raise ValueError("The count must be None, an int, or two value tuple. Got '%s' (%s)'" % (count, type(count)))
result = []
enum_keys = [k.upper() for k in list(enumeration.keys())]
enum_values = list(enumeration)
for val in values:
if val in enum_keys:
result.append(enum_values[enum_keys.index(val)])
else:
raise ValueError("The '%s' entry of config entry '%s' wasn't in the enumeration (expected %s)" % (val, key, ', '.join(enum_keys)))
return result
class Config(object):
"""
Handler for easily working with custom configurations, providing persistence
to and from files. All operations are thread safe.
**Example usage:**
User has a file at '/home/atagar/myConfig' with...
::
destination.ip 1.2.3.4
destination.port blarg
startup.run export PATH=$PATH:~/bin
startup.run alias l=ls
And they have a script with...
::
from stem.util import conf
# Configuration values we'll use in this file. These are mappings of
# configuration keys to the default values we'll use if the user doesn't
# have something different in their config file (or it doesn't match this
# type).
ssh_config = conf.config_dict('ssh_login', {
'login.user': 'atagar',
'login.password': 'pepperjack_is_awesome!',
'destination.ip': '127.0.0.1',
'destination.port': 22,
'startup.run': [],
})
# Makes an empty config instance with the handle of 'ssh_login'. This is
# a singleton so other classes can fetch this same configuration from
# this handle.
user_config = conf.get_config('ssh_login')
# Loads the user's configuration file, warning if this fails.
try:
user_config.load("/home/atagar/myConfig")
except IOError as exc:
print "Unable to load the user's config: %s" % exc
# This replace the contents of ssh_config with the values from the user's
# config file if...
#
# * the key is present in the config file
# * we're able to convert the configuration file's value to the same type
# as what's in the mapping (see the Config.get() method for how these
# type inferences work)
#
# For instance in this case...
#
# * the login values are left alone because they aren't in the user's
# config file
#
# * the 'destination.port' is also left with the value of 22 because we
# can't turn "blarg" into an integer
#
# The other values are replaced, so ssh_config now becomes...
#
# {'login.user': 'atagar',
# 'login.password': 'pepperjack_is_awesome!',
# 'destination.ip': '1.2.3.4',
# 'destination.port': 22,
# 'startup.run': ['export PATH=$PATH:~/bin', 'alias l=ls']}
#
# Information for what values fail to load and why are reported to
# 'stem.util.log'.
"""
def __init__(self):
self._path = None # location we last loaded from or saved to
self._contents = {} # configuration key/value pairs
self._listeners = [] # functors to be notified of config changes
# used for accessing _contents
self._contents_lock = threading.RLock()
# keys that have been requested (used to provide unused config contents)
self._requested_keys = set()
def load(self, path = None):
"""
Reads in the contents of the given path, adding its configuration values
to our current contents. If the path is a directory then this loads each
of the files, recursively.
.. versionchanged:: 1.3.0
Added support for directories.
:param str path: file or directory path to be loaded, this uses the last
loaded path if not provided
:raises:
* **IOError** if we fail to read the file (it doesn't exist, insufficient
permissions, etc)
* **ValueError** if no path was provided and we've never been provided one
"""
if path:
self._path = path
elif not self._path:
raise ValueError('Unable to load configuration: no path provided')
if os.path.isdir(self._path):
for root, dirnames, filenames in os.walk(self._path):
for filename in filenames:
self.load(os.path.join(root, filename))
return
with open(self._path, 'r') as config_file:
read_contents = config_file.readlines()
with self._contents_lock:
while read_contents:
line = read_contents.pop(0)
# strips any commenting or excess whitespace
comment_start = line.find('#')
if comment_start != -1:
line = line[:comment_start]
line = line.strip()
# parse the key/value pair
if line:
try:
key, value = line.split(' ', 1)
value = value.strip()
except ValueError:
log.debug("Config entry '%s' is expected to be of the format 'Key Value', defaulting to '%s' -> ''" % (line, line))
key, value = line, ''
if not value:
# this might be a multi-line entry, try processing it as such
multiline_buffer = []
while read_contents and read_contents[0].lstrip().startswith('|'):
content = read_contents.pop(0).lstrip()[1:] # removes '\s+|' prefix
content = content.rstrip('\n') # trailing newline
multiline_buffer.append(content)
if multiline_buffer:
self.set(key, '\n'.join(multiline_buffer), False)
continue
self.set(key, value, False)
def save(self, path = None):
"""
Saves configuration contents to disk. If a path is provided then it
replaces the configuration location that we track.
:param str path: location to be saved to
:raises: **ValueError** if no path was provided and we've never been provided one
"""
if path:
self._path = path
elif not self._path:
raise ValueError('Unable to save configuration: no path provided')
with self._contents_lock:
with open(self._path, 'w') as output_file:
for entry_key in sorted(self.keys()):
for entry_value in self.get_value(entry_key, multiple = True):
# check for multi line entries
if '\n' in entry_value:
entry_value = '\n|' + entry_value.replace('\n', '\n|')
output_file.write('%s %s\n' % (entry_key, entry_value))
def clear(self):
"""
Drops the configuration contents and reverts back to a blank, unloaded
state.
"""
with self._contents_lock:
self._contents.clear()
self._requested_keys = set()
def add_listener(self, listener, backfill = True):
"""
Registers the function to be notified of configuration updates. Listeners
are expected to be functors which accept (config, key).
:param functor listener: function to be notified when our configuration is changed
:param bool backfill: calls the function with our current values if **True**
"""
with self._contents_lock:
self._listeners.append(listener)
if backfill:
for key in self.keys():
listener(self, key)
def clear_listeners(self):
"""
Removes all attached listeners.
"""
self._listeners = []
def keys(self):
"""
Provides all keys in the currently loaded configuration.
:returns: **list** if strings for the configuration keys we've loaded
"""
return list(self._contents.keys())
def unused_keys(self):
"""
Provides the configuration keys that have never been provided to a caller
via :func:`~stem.util.conf.config_dict` or the
:func:`~stem.util.conf.Config.get` and
:func:`~stem.util.conf.Config.get_value` methods.
:returns: **set** of configuration keys we've loaded but have never been requested
"""
return set(self.keys()).difference(self._requested_keys)
def set(self, key, value, overwrite = True):
"""
Appends the given key/value configuration mapping, behaving the same as if
we'd loaded this from a configuration file.
:param str key: key for the configuration mapping
:param str,list value: value we're setting the mapping to
:param bool overwrite: replaces the previous value if **True**, otherwise
the values are appended
"""
with self._contents_lock:
if isinstance(value, str):
if not overwrite and key in self._contents:
self._contents[key].append(value)
else:
self._contents[key] = [value]
for listener in self._listeners:
listener(self, key)
elif isinstance(value, (list, tuple)):
if not overwrite and key in self._contents:
self._contents[key] += value
else:
self._contents[key] = value
for listener in self._listeners:
listener(self, key)
else:
raise ValueError("Config.set() only accepts str, list, or tuple. Provided value was a '%s'" % type(value))
def get(self, key, default = None):
"""
Fetches the given configuration, using the key and default value to
determine the type it should be. Recognized inferences are:
* **default is a boolean => boolean**
* values are case insensitive
* provides the default if the value isn't "true" or "false"
* **default is an integer => int**
* provides the default if the value can't be converted to an int
* **default is a float => float**
* provides the default if the value can't be converted to a float
* **default is a list => list**
* string contents for all configuration values with this key
* **default is a tuple => tuple**
* string contents for all configuration values with this key
* **default is a dictionary => dict**
* values without "=>" in them are ignored
* values are split into key/value pairs on "=>" with extra whitespace
stripped
:param str key: config setting to be fetched
:param default object: value provided if no such key exists or fails to be converted
:returns: given configuration value with its type inferred with the above rules
"""
is_multivalue = isinstance(default, (list, tuple, dict))
val = self.get_value(key, default, is_multivalue)
if val == default:
return val # don't try to infer undefined values
if isinstance(default, bool):
if val.lower() == 'true':
val = True
elif val.lower() == 'false':
val = False
else:
log.debug("Config entry '%s' is expected to be a boolean, defaulting to '%s'" % (key, str(default)))
val = default
elif isinstance(default, int):
try:
val = int(val)
except ValueError:
log.debug("Config entry '%s' is expected to be an integer, defaulting to '%i'" % (key, default))
val = default
elif isinstance(default, float):
try:
val = float(val)
except ValueError:
log.debug("Config entry '%s' is expected to be a float, defaulting to '%f'" % (key, default))
val = default
elif isinstance(default, list):
val = list(val) # make a shallow copy
elif isinstance(default, tuple):
val = tuple(val)
elif isinstance(default, dict):
val_map = OrderedDict()
for entry in val:
if '=>' in entry:
entry_key, entry_val = entry.split('=>', 1)
val_map[entry_key.strip()] = entry_val.strip()
else:
log.debug('Ignoring invalid %s config entry (expected a mapping, but "%s" was missing "=>")' % (key, entry))
val = val_map
return val
def get_value(self, key, default = None, multiple = False):
"""
This provides the current value associated with a given key.
:param str key: config setting to be fetched
:param object default: value provided if no such key exists
:param bool multiple: provides back a list of all values if **True**,
otherwise this returns the last loaded configuration value
:returns: **str** or **list** of string configuration values associated
with the given key, providing the default if no such key exists
"""
with self._contents_lock:
if key in self._contents:
self._requested_keys.add(key)
if multiple:
return self._contents[key]
else:
return self._contents[key][-1]
else:
message_id = 'stem.util.conf.missing_config_key_%s' % key
log.log_once(message_id, log.TRACE, "config entry '%s' not found, defaulting to '%s'" % (key, default))
return default

View file

@ -0,0 +1,651 @@
# Copyright 2012-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Connection and networking based utility functions.
**Module Overview:**
::
get_connections - quieries the connections belonging to a given process
system_resolvers - provides connection resolution methods that are likely to be available
port_usage - brief description of the common usage for a port
is_valid_ipv4_address - checks if a string is a valid IPv4 address
is_valid_ipv6_address - checks if a string is a valid IPv6 address
is_valid_port - checks if something is a valid representation for a port
is_private_address - checks if an IPv4 address belongs to a private range or not
expand_ipv6_address - provides an IPv6 address with its collapsed portions expanded
get_mask_ipv4 - provides the mask representation for a given number of bits
get_mask_ipv6 - provides the IPv6 mask representation for a given number of bits
.. data:: Resolver (enum)
Method for resolving a process' connections.
.. versionadded:: 1.1.0
.. versionchanged:: 1.4.0
Added **NETSTAT_WINDOWS**.
==================== ===========
Resolver Description
==================== ===========
**PROC** /proc contents
**NETSTAT** netstat
**NETSTAT_WINDOWS** netstat command under Windows
**SS** ss command
**LSOF** lsof command
**SOCKSTAT** sockstat command under *nix
**BSD_SOCKSTAT** sockstat command under FreeBSD
**BSD_PROCSTAT** procstat command under FreeBSD
==================== ===========
"""
import collections
import hashlib
import hmac
import os
import platform
import re
import stem.util.proc
import stem.util.system
from stem import str_type
from stem.util import conf, enum, log
# Connection resolution is risky to log about since it's highly likely to
# contain sensitive information. That said, it's also difficult to get right in
# a platform independent fashion. To opt into the logging requried to
# troubleshoot connection resolution set the following...
LOG_CONNECTION_RESOLUTION = False
Resolver = enum.Enum(
('PROC', 'proc'),
('NETSTAT', 'netstat'),
('NETSTAT_WINDOWS', 'netstat (windows)'),
('SS', 'ss'),
('LSOF', 'lsof'),
('SOCKSTAT', 'sockstat'),
('BSD_SOCKSTAT', 'sockstat (bsd)'),
('BSD_PROCSTAT', 'procstat (bsd)')
)
Connection = collections.namedtuple('Connection', [
'local_address',
'local_port',
'remote_address',
'remote_port',
'protocol',
])
FULL_IPv4_MASK = '255.255.255.255'
FULL_IPv6_MASK = 'FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF'
CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE = os.urandom(32)
PORT_USES = None # port number => description
RESOLVER_COMMAND = {
Resolver.PROC: '',
# -n = prevents dns lookups, -p = include process
Resolver.NETSTAT: 'netstat -np',
# -a = show all TCP/UDP connections, -n = numeric addresses and ports, -o = include pid
Resolver.NETSTAT_WINDOWS: 'netstat -ano',
# -n = numeric ports, -p = include process, -t = tcp sockets, -u = udp sockets
Resolver.SS: 'ss -nptu',
# -n = prevent dns lookups, -P = show port numbers (not names), -i = ip only, -w = no warnings
# (lsof provides a '-p <pid>' but oddly in practice it seems to be ~11-28% slower)
Resolver.LSOF: 'lsof -wnPi',
Resolver.SOCKSTAT: 'sockstat',
# -4 = IPv4, -c = connected sockets
Resolver.BSD_SOCKSTAT: 'sockstat -4c',
# -f <pid> = process pid
Resolver.BSD_PROCSTAT: 'procstat -f {pid}',
}
RESOLVER_FILTER = {
Resolver.PROC: '',
# tcp 0 586 192.168.0.1:44284 38.229.79.2:443 ESTABLISHED 15843/tor
Resolver.NETSTAT: '^{protocol}\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED\s+{pid}/{name}\s*$',
# tcp 586 192.168.0.1:44284 38.229.79.2:443 ESTABLISHED 15843
Resolver.NETSTAT_WINDOWS: '^\s*{protocol}\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED\s+{pid}\s*$',
# tcp ESTAB 0 0 192.168.0.20:44415 38.229.79.2:443 users:(("tor",15843,9))
Resolver.SS: '^{protocol}\s+ESTAB\s+.*\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+users:\(\("{name}",{pid},[0-9]+\)\)$',
# tor 3873 atagar 45u IPv4 40994 0t0 TCP 10.243.55.20:45724->194.154.227.109:9001 (ESTABLISHED)
Resolver.LSOF: '^{name}\s+{pid}\s+.*\s+{protocol}\s+{local_address}:{local_port}->{remote_address}:{remote_port} \(ESTABLISHED\)$',
# atagar tor 15843 tcp4 192.168.0.20:44092 68.169.35.102:443 ESTABLISHED
Resolver.SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}\s+ESTABLISHED$',
# _tor tor 4397 12 tcp4 172.27.72.202:54011 127.0.0.1:9001
Resolver.BSD_SOCKSTAT: '^\S+\s+{name}\s+{pid}\s+\S+\s+{protocol}4\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
# 3561 tor 4 s - rw---n-- 2 0 TCP 10.0.0.2:9050 10.0.0.1:22370
Resolver.BSD_PROCSTAT: '^\s*{pid}\s+{name}\s+.*\s+{protocol}\s+{local_address}:{local_port}\s+{remote_address}:{remote_port}$',
}
def get_connections(resolver, process_pid = None, process_name = None):
"""
Retrieves a list of the current connections for a given process. This
provides a list of Connection instances, which have five attributes...
* **local_address** (str)
* **local_port** (int)
* **remote_address** (str)
* **remote_port** (int)
* **protocol** (str, generally either 'tcp' or 'udp')
.. versionadded:: 1.1.0
:param Resolver resolver: method of connection resolution to use
:param int process_pid: pid of the process to retrieve
:param str process_name: name of the process to retrieve
:returns: **list** of Connection instances
:raises:
* **ValueError** if using **Resolver.PROC** or **Resolver.BSD_PROCSTAT**
and the process_pid wasn't provided
* **IOError** if no connections are available or resolution fails
(generally they're indistinguishable). The common causes are the
command being unavailable or permissions.
"""
def _log(msg):
if LOG_CONNECTION_RESOLUTION:
log.debug(msg)
_log('=' * 80)
_log('Querying connections for resolver: %s, pid: %s, name: %s' % (resolver, process_pid, process_name))
if isinstance(process_pid, str):
try:
process_pid = int(process_pid)
except ValueError:
raise ValueError('Process pid was non-numeric: %s' % process_pid)
if process_pid is None and process_name and resolver == Resolver.NETSTAT_WINDOWS:
process_pid = stem.util.system.pid_by_name(process_name)
if process_pid is None and resolver in (Resolver.NETSTAT_WINDOWS, Resolver.PROC, Resolver.BSD_PROCSTAT):
raise ValueError('%s resolution requires a pid' % resolver)
if resolver == Resolver.PROC:
return [Connection(*conn) for conn in stem.util.proc.connections(process_pid)]
resolver_command = RESOLVER_COMMAND[resolver].format(pid = process_pid)
try:
results = stem.util.system.call(resolver_command)
except OSError as exc:
raise IOError("Unable to query '%s': %s" % (resolver_command, exc))
resolver_regex_str = RESOLVER_FILTER[resolver].format(
protocol = '(?P<protocol>\S+)',
local_address = '(?P<local_address>[0-9.]+)',
local_port = '(?P<local_port>[0-9]+)',
remote_address = '(?P<remote_address>[0-9.]+)',
remote_port = '(?P<remote_port>[0-9]+)',
pid = process_pid if process_pid else '[0-9]*',
name = process_name if process_name else '\S*',
)
_log('Resolver regex: %s' % resolver_regex_str)
_log('Resolver results:\n%s' % '\n'.join(results))
connections = []
resolver_regex = re.compile(resolver_regex_str)
for line in results:
match = resolver_regex.match(line)
if match:
attr = match.groupdict()
local_addr = attr['local_address']
local_port = int(attr['local_port'])
remote_addr = attr['remote_address']
remote_port = int(attr['remote_port'])
protocol = attr['protocol'].lower()
if remote_addr == '0.0.0.0':
continue # procstat response for unestablished connections
if not (is_valid_ipv4_address(local_addr) and is_valid_ipv4_address(remote_addr)):
_log('Invalid address (%s or %s): %s' % (local_addr, remote_addr, line))
elif not (is_valid_port(local_port) and is_valid_port(remote_port)):
_log('Invalid port (%s or %s): %s' % (local_port, remote_port, line))
elif protocol not in ('tcp', 'udp'):
_log('Unrecognized protocol (%s): %s' % (protocol, line))
conn = Connection(local_addr, local_port, remote_addr, remote_port, protocol)
connections.append(conn)
_log(str(conn))
_log('%i connections found' % len(connections))
if not connections:
raise IOError('No results found using: %s' % resolver_command)
return connections
def system_resolvers(system = None):
"""
Provides the types of connection resolvers likely to be available on this platform.
.. versionadded:: 1.1.0
.. versionchanged:: 1.3.0
Renamed from get_system_resolvers() to system_resolvers(). The old name
still works as an alias, but will be dropped in Stem version 2.0.0.
:param str system: system to get resolvers for, this is determined by
platform.system() if not provided
:returns: **list** of :data:`~stem.util.connection.Resolver` instances available on this platform
"""
if system is None:
if stem.util.system.is_gentoo():
system = 'Gentoo'
else:
system = platform.system()
if system == 'Windows':
resolvers = [Resolver.NETSTAT_WINDOWS]
elif system in ('Darwin', 'OpenBSD'):
resolvers = [Resolver.LSOF]
elif system == 'FreeBSD':
# Netstat is available, but lacks a '-p' equivalent so we can't associate
# the results to processes. The platform also has a ss command, but it
# belongs to a spreadsheet application.
resolvers = [Resolver.BSD_SOCKSTAT, Resolver.BSD_PROCSTAT, Resolver.LSOF]
else:
# Sockstat isn't available by default on ubuntu.
resolvers = [Resolver.NETSTAT, Resolver.SOCKSTAT, Resolver.LSOF, Resolver.SS]
# remove any that aren't in the user's PATH
resolvers = [r for r in resolvers if stem.util.system.is_available(RESOLVER_COMMAND[r])]
# proc resolution, by far, outperforms the others so defaults to this is able
if stem.util.proc.is_available() and os.access('/proc/net/tcp', os.R_OK) and os.access('/proc/net/udp', os.R_OK):
resolvers = [Resolver.PROC] + resolvers
return resolvers
def port_usage(port):
"""
Provides the common use of a given port. For example, 'HTTP' for port 80 or
'SSH' for 22.
.. versionadded:: 1.2.0
:param int port: port number to look up
:returns: **str** with a description for the port, **None** if none is known
"""
global PORT_USES
if PORT_USES is None:
config = conf.Config()
config_path = os.path.join(os.path.dirname(__file__), 'ports.cfg')
try:
config.load(config_path)
port_uses = {}
for key, value in config.get('port', {}).items():
if key.isdigit():
port_uses[int(key)] = value
elif '-' in key:
min_port, max_port = key.split('-', 1)
for port_entry in range(int(min_port), int(max_port) + 1):
port_uses[port_entry] = value
else:
raise ValueError("'%s' is an invalid key" % key)
PORT_USES = port_uses
except Exception as exc:
log.warn("BUG: stem failed to load its internal port descriptions from '%s': %s" % (config_path, exc))
if not PORT_USES:
return None
if isinstance(port, str) and port.isdigit():
port = int(port)
return PORT_USES.get(port)
def is_valid_ipv4_address(address):
"""
Checks if a string is a valid IPv4 address.
:param str address: string to be checked
:returns: **True** if input is a valid IPv4 address, **False** otherwise
"""
if not isinstance(address, (bytes, str_type)):
return False
# checks if theres four period separated values
if address.count('.') != 3:
return False
# checks that each value in the octet are decimal values between 0-255
for entry in address.split('.'):
if not entry.isdigit() or int(entry) < 0 or int(entry) > 255:
return False
elif entry[0] == '0' and len(entry) > 1:
return False # leading zeros, for instance in '1.2.3.001'
return True
def is_valid_ipv6_address(address, allow_brackets = False):
"""
Checks if a string is a valid IPv6 address.
:param str address: string to be checked
:param bool allow_brackets: ignore brackets which form '[address]'
:returns: **True** if input is a valid IPv6 address, **False** otherwise
"""
if allow_brackets:
if address.startswith('[') and address.endswith(']'):
address = address[1:-1]
# addresses are made up of eight colon separated groups of four hex digits
# with leading zeros being optional
# https://en.wikipedia.org/wiki/IPv6#Address_format
colon_count = address.count(':')
if colon_count > 7:
return False # too many groups
elif colon_count != 7 and '::' not in address:
return False # not enough groups and none are collapsed
elif address.count('::') > 1 or ':::' in address:
return False # multiple groupings of zeros can't be collapsed
for entry in address.split(':'):
if not re.match('^[0-9a-fA-f]{0,4}$', entry):
return False
return True
def is_valid_port(entry, allow_zero = False):
"""
Checks if a string or int is a valid port number.
:param list,str,int entry: string, integer or list to be checked
:param bool allow_zero: accept port number of zero (reserved by definition)
:returns: **True** if input is an integer and within the valid port range, **False** otherwise
"""
try:
value = int(entry)
if str(value) != str(entry):
return False # invalid leading char, e.g. space or zero
elif allow_zero and value == 0:
return True
else:
return value > 0 and value < 65536
except TypeError:
if isinstance(entry, (tuple, list)):
for port in entry:
if not is_valid_port(port, allow_zero):
return False
return True
else:
return False
except ValueError:
return False
def is_private_address(address):
"""
Checks if the IPv4 address is in a range belonging to the local network or
loopback. These include:
* Private ranges: 10.*, 172.16.* - 172.31.*, 192.168.*
* Loopback: 127.*
.. versionadded:: 1.1.0
:param str address: string to be checked
:returns: **True** if input is in a private range, **False** otherwise
:raises: **ValueError** if the address isn't a valid IPv4 address
"""
if not is_valid_ipv4_address(address):
raise ValueError("'%s' isn't a valid IPv4 address" % address)
# checks for any of the simple wildcard ranges
if address.startswith('10.') or address.startswith('192.168.') or address.startswith('127.'):
return True
# checks for the 172.16.* - 172.31.* range
if address.startswith('172.'):
second_octet = int(address.split('.')[1])
if second_octet >= 16 and second_octet <= 31:
return True
return False
def expand_ipv6_address(address):
"""
Expands abbreviated IPv6 addresses to their full colon separated hex format.
For instance...
::
>>> expand_ipv6_address('2001:db8::ff00:42:8329')
'2001:0db8:0000:0000:0000:ff00:0042:8329'
>>> expand_ipv6_address('::')
'0000:0000:0000:0000:0000:0000:0000:0000'
:param str address: IPv6 address to be expanded
:raises: **ValueError** if the address can't be expanded due to being malformed
"""
if not is_valid_ipv6_address(address):
raise ValueError("'%s' isn't a valid IPv6 address" % address)
# expands collapsed groupings, there can only be a single '::' in a valid
# address
if '::' in address:
missing_groups = 7 - address.count(':')
address = address.replace('::', '::' + ':' * missing_groups)
# inserts missing zeros
for index in range(8):
start = index * 5
end = address.index(':', start) if index != 7 else len(address)
missing_zeros = 4 - (end - start)
if missing_zeros > 0:
address = address[:start] + '0' * missing_zeros + address[start:]
return address
def get_mask_ipv4(bits):
"""
Provides the IPv4 mask for a given number of bits, in the dotted-quad format.
:param int bits: number of bits to be converted
:returns: **str** with the subnet mask representation for this many bits
:raises: **ValueError** if given a number of bits outside the range of 0-32
"""
if bits > 32 or bits < 0:
raise ValueError('A mask can only be 0-32 bits, got %i' % bits)
elif bits == 32:
return FULL_IPv4_MASK
# get the binary representation of the mask
mask_bin = _get_binary(2 ** bits - 1, 32)[::-1]
# breaks it into eight character groupings
octets = [mask_bin[8 * i:8 * (i + 1)] for i in range(4)]
# converts each octet into its integer value
return '.'.join([str(int(octet, 2)) for octet in octets])
def get_mask_ipv6(bits):
"""
Provides the IPv6 mask for a given number of bits, in the hex colon-delimited
format.
:param int bits: number of bits to be converted
:returns: **str** with the subnet mask representation for this many bits
:raises: **ValueError** if given a number of bits outside the range of 0-128
"""
if bits > 128 or bits < 0:
raise ValueError('A mask can only be 0-128 bits, got %i' % bits)
elif bits == 128:
return FULL_IPv6_MASK
# get the binary representation of the mask
mask_bin = _get_binary(2 ** bits - 1, 128)[::-1]
# breaks it into sixteen character groupings
groupings = [mask_bin[16 * i:16 * (i + 1)] for i in range(8)]
# converts each group into its hex value
return ':'.join(['%04x' % int(group, 2) for group in groupings]).upper()
def _get_masked_bits(mask):
"""
Provides the number of bits that an IPv4 subnet mask represents. Note that
not all masks can be represented by a bit count.
:param str mask: mask to be converted
:returns: **int** with the number of bits represented by the mask
:raises: **ValueError** if the mask is invalid or can't be converted
"""
if not is_valid_ipv4_address(mask):
raise ValueError("'%s' is an invalid subnet mask" % mask)
# converts octets to binary representation
mask_bin = _get_address_binary(mask)
mask_match = re.match('^(1*)(0*)$', mask_bin)
if mask_match:
return 32 - len(mask_match.groups()[1])
else:
raise ValueError('Unable to convert mask to a bit count: %s' % mask)
def _get_binary(value, bits):
"""
Provides the given value as a binary string, padded with zeros to the given
number of bits.
:param int value: value to be converted
:param int bits: number of bits to pad to
"""
# http://www.daniweb.com/code/snippet216539.html
return ''.join([str((value >> y) & 1) for y in range(bits - 1, -1, -1)])
def _get_address_binary(address):
"""
Provides the binary value for an IPv4 or IPv6 address.
:returns: **str** with the binary representation of this address
:raises: **ValueError** if address is neither an IPv4 nor IPv6 address
"""
if is_valid_ipv4_address(address):
return ''.join([_get_binary(int(octet), 8) for octet in address.split('.')])
elif is_valid_ipv6_address(address):
address = expand_ipv6_address(address)
return ''.join([_get_binary(int(grouping, 16), 16) for grouping in address.split(':')])
else:
raise ValueError("'%s' is neither an IPv4 or IPv6 address" % address)
def _hmac_sha256(key, msg):
"""
Generates a sha256 digest using the given key and message.
:param str key: starting key for the hash
:param str msg: message to be hashed
:returns: sha256 digest of msg as bytes, hashed using the given key
"""
return hmac.new(key, msg, hashlib.sha256).digest()
def _cryptovariables_equal(x, y):
"""
Compares two strings for equality securely.
:param str x: string to be compared.
:param str y: the other string to be compared.
:returns: **True** if both strings are equal, **False** otherwise.
"""
return (
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, x) ==
_hmac_sha256(CRYPTOVARIABLE_EQUALITY_COMPARISON_NONCE, y))
# TODO: drop with stem 2.x
# We renamed our methods to drop a redundant 'get_*' prefix, so alias the old
# names for backward compatability.
get_system_resolvers = system_resolvers

View file

@ -0,0 +1,172 @@
# Copyright 2011-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Basic enumeration, providing ordered types for collections. These can be
constructed as simple type listings...
::
>>> from stem.util import enum
>>> insects = enum.Enum('ANT', 'WASP', 'LADYBUG', 'FIREFLY')
>>> insects.ANT
'Ant'
>>> tuple(insects)
('Ant', 'Wasp', 'Ladybug', 'Firefly')
... or with overwritten string counterparts...
::
>>> from stem.util import enum
>>> pets = enum.Enum(('DOG', 'Skippy'), 'CAT', ('FISH', 'Nemo'))
>>> pets.DOG
'Skippy'
>>> pets.CAT
'Cat'
**Module Overview:**
::
UppercaseEnum - Provides an enum instance with capitalized values
Enum - Provides a basic, ordered enumeration
|- keys - string representation of our enum keys
|- index_of - index of an enum value
|- next - provides the enum after a given enum value
|- previous - provides the enum before a given value
|- __getitem__ - provides the value for an enum key
+- __iter__ - iterator over our enum keys
"""
from stem import str_type
def UppercaseEnum(*args):
"""
Provides an :class:`~stem.util.enum.Enum` instance where the values are
identical to the keys. Since the keys are uppercase by convention this means
the values are too. For instance...
::
>>> from stem.util import enum
>>> runlevels = enum.UppercaseEnum('DEBUG', 'INFO', 'NOTICE', 'WARN', 'ERROR')
>>> runlevels.DEBUG
'DEBUG'
:param list args: enum keys to initialize with
:returns: :class:`~stem.util.enum.Enum` instance with the given keys
"""
return Enum(*[(v, v) for v in args])
class Enum(object):
"""
Basic enumeration.
"""
def __init__(self, *args):
from stem.util.str_tools import _to_camel_case
# ordered listings of our keys and values
keys, values = [], []
for entry in args:
if isinstance(entry, (bytes, str_type)):
key, val = entry, _to_camel_case(entry)
elif isinstance(entry, tuple) and len(entry) == 2:
key, val = entry
else:
raise ValueError('Unrecognized input: %s' % args)
keys.append(key)
values.append(val)
setattr(self, key, val)
self._keys = tuple(keys)
self._values = tuple(values)
def keys(self):
"""
Provides an ordered listing of the enumeration keys in this set.
:returns: **list** with our enum keys
"""
return list(self._keys)
def index_of(self, value):
"""
Provides the index of the given value in the collection.
:param str value: entry to be looked up
:returns: **int** index of the given entry
:raises: **ValueError** if no such element exists
"""
return self._values.index(value)
def next(self, value):
"""
Provides the next enumeration after the given value.
:param str value: enumeration for which to get the next entry
:returns: enum value following the given entry
:raises: **ValueError** if no such element exists
"""
if value not in self._values:
raise ValueError('No such enumeration exists: %s (options: %s)' % (value, ', '.join(self._values)))
next_index = (self._values.index(value) + 1) % len(self._values)
return self._values[next_index]
def previous(self, value):
"""
Provides the previous enumeration before the given value.
:param str value: enumeration for which to get the previous entry
:returns: enum value proceeding the given entry
:raises: **ValueError** if no such element exists
"""
if value not in self._values:
raise ValueError('No such enumeration exists: %s (options: %s)' % (value, ', '.join(self._values)))
prev_index = (self._values.index(value) - 1) % len(self._values)
return self._values[prev_index]
def __getitem__(self, item):
"""
Provides the values for the given key.
:param str item: key to be looked up
:returns: **str** with the value for the given key
:raises: **ValueError** if the key doesn't exist
"""
if item in vars(self):
return getattr(self, item)
else:
keys = ', '.join(self.keys())
raise ValueError("'%s' isn't among our enumeration keys, which includes: %s" % (item, keys))
def __iter__(self):
"""
Provides an ordered listing of the enums in this set.
"""
for entry in self._values:
yield entry

View file

@ -0,0 +1,253 @@
# Copyright 2011-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Functions to aid library logging. The default logging
:data:`~stem.util.log.Runlevel` is usually NOTICE and above.
**Stem users are more than welcome to listen for stem events, but these
functions are not being vended to our users. They may change in the future, use
them at your own risk.**
**Module Overview:**
::
get_logger - provides the stem's Logger instance
logging_level - converts a runlevel to its logging number
escape - escapes special characters in a message in preparation for logging
log - logs a message at the given runlevel
log_once - logs a message, deduplicating if it has already been logged
trace - logs a message at the TRACE runlevel
debug - logs a message at the DEBUG runlevel
info - logs a message at the INFO runlevel
notice - logs a message at the NOTICE runlevel
warn - logs a message at the WARN runlevel
error - logs a message at the ERROR runlevel
LogBuffer - Buffers logged events so they can be iterated over.
|- is_empty - checks if there's events in our buffer
+- __iter__ - iterates over and removes the buffered events
log_to_stdout - reports further logged events to stdout
.. data:: Runlevel (enum)
Enumeration for logging runlevels.
========== ===========
Runlevel Description
========== ===========
**ERROR** critical issue occurred, the user needs to be notified
**WARN** non-critical issue occurred that the user should be aware of
**NOTICE** information that is helpful to the user
**INFO** high level library activity
**DEBUG** low level library activity
**TRACE** request/reply logging
========== ===========
"""
import logging
import stem.prereq
import stem.util.enum
import stem.util.str_tools
# Logging runlevels. These are *very* commonly used so including shorter
# aliases (so they can be referenced as log.DEBUG, log.WARN, etc).
Runlevel = stem.util.enum.UppercaseEnum('TRACE', 'DEBUG', 'INFO', 'NOTICE', 'WARN', 'ERROR')
TRACE, DEBUG, INFO, NOTICE, WARN, ERR = list(Runlevel)
# mapping of runlevels to the logger module's values, TRACE and DEBUG aren't
# built into the module
LOG_VALUES = {
Runlevel.TRACE: logging.DEBUG - 5,
Runlevel.DEBUG: logging.DEBUG,
Runlevel.INFO: logging.INFO,
Runlevel.NOTICE: logging.INFO + 5,
Runlevel.WARN: logging.WARN,
Runlevel.ERROR: logging.ERROR,
}
logging.addLevelName(LOG_VALUES[TRACE], 'TRACE')
logging.addLevelName(LOG_VALUES[NOTICE], 'NOTICE')
LOGGER = logging.getLogger('stem')
LOGGER.setLevel(LOG_VALUES[TRACE])
# There's some messages that we don't want to log more than once. This set has
# the messages IDs that we've logged which fall into this category.
DEDUPLICATION_MESSAGE_IDS = set()
# Adds a default nullhandler for the stem logger, suppressing the 'No handlers
# could be found for logger "stem"' warning as per...
# http://docs.python.org/release/3.1.3/library/logging.html#configuring-logging-for-a-library
class _NullHandler(logging.Handler):
def emit(self, record):
pass
if not LOGGER.handlers:
LOGGER.addHandler(_NullHandler())
def get_logger():
"""
Provides the stem logger.
:return: **logging.Logger** for stem
"""
return LOGGER
def logging_level(runlevel):
"""
Translates a runlevel into the value expected by the logging module.
:param stem.util.log.Runlevel runlevel: runlevel to be returned, no logging if **None**
"""
if runlevel:
return LOG_VALUES[runlevel]
else:
return logging.FATAL + 5
def escape(message):
"""
Escapes specific sequences for logging (newlines, tabs, carriage returns). If
the input is **bytes** then this converts it to **unicode** under python 3.x.
:param str message: string to be escaped
:returns: str that is escaped
"""
if stem.prereq.is_python_3():
message = stem.util.str_tools._to_unicode(message)
for pattern, replacement in (('\n', '\\n'), ('\r', '\\r'), ('\t', '\\t')):
message = message.replace(pattern, replacement)
return message
def log(runlevel, message):
"""
Logs a message at the given runlevel.
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
:param str message: message to be logged
"""
if runlevel:
LOGGER.log(LOG_VALUES[runlevel], message)
def log_once(message_id, runlevel, message):
"""
Logs a message at the given runlevel. If a message with this ID has already
been logged then this is a no-op.
:param str message_id: unique message identifier to deduplicate on
:param stem.util.log.Runlevel runlevel: runlevel to log the message at, logging is skipped if **None**
:param str message: message to be logged
:returns: **True** if we log the message, **False** otherwise
"""
if not runlevel or message_id in DEDUPLICATION_MESSAGE_IDS:
return False
else:
DEDUPLICATION_MESSAGE_IDS.add(message_id)
log(runlevel, message)
# shorter aliases for logging at a runlevel
def trace(message):
log(Runlevel.TRACE, message)
def debug(message):
log(Runlevel.DEBUG, message)
def info(message):
log(Runlevel.INFO, message)
def notice(message):
log(Runlevel.NOTICE, message)
def warn(message):
log(Runlevel.WARN, message)
def error(message):
log(Runlevel.ERROR, message)
class LogBuffer(logging.Handler):
"""
Basic log handler that listens for stem events and stores them so they can be
read later. Log entries are cleared as they are read.
.. versionchanged:: 1.4.0
Added the yield_records argument.
"""
def __init__(self, runlevel, yield_records = False):
# TODO: At least in python 2.6 logging.Handler has a bug in that it doesn't
# extend object, causing our super() call to fail. When we drop python 2.6
# support we should switch back to using super() instead.
#
# super(LogBuffer, self).__init__(level = logging_level(runlevel))
logging.Handler.__init__(self, level = logging_level(runlevel))
self.formatter = logging.Formatter(
fmt = '%(asctime)s [%(levelname)s] %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S')
self._buffer = []
self._yield_records = yield_records
def is_empty(self):
return not bool(self._buffer)
def __iter__(self):
while self._buffer:
record = self._buffer.pop(0)
yield record if self._yield_records else self.formatter.format(record)
def emit(self, record):
self._buffer.append(record)
class _StdoutLogger(logging.Handler):
def __init__(self, runlevel):
logging.Handler.__init__(self, level = logging_level(runlevel))
self.formatter = logging.Formatter(
fmt = '%(asctime)s [%(levelname)s] %(message)s',
datefmt = '%m/%d/%Y %H:%M:%S')
def emit(self, record):
print(self.formatter.format(record))
def log_to_stdout(runlevel):
"""
Logs further events to stdout.
:param stem.util.log.Runlevel runlevel: minimum runlevel a message needs to be to be logged
"""
get_logger().addHandler(_StdoutLogger(runlevel))

View file

@ -0,0 +1,182 @@
# Drop in replace for python 3.2's collections.lru_cache, from...
# http://code.activestate.com/recipes/578078-py26-and-py30-backport-of-python-33s-lru-cache/
#
# ... which is under the MIT license. Stem users should *not* rely upon this
# module. It will be removed when we drop support for python 3.2 and below.
"""
Memoization decorator that caches a function's return value. If later called
with the same arguments then the cached value is returned rather than
reevaluated.
This is a a python 2.x port of `functools.lru_cache
<http://docs.python.org/3/library/functools.html#functools.lru_cache>`_. If
using python 3.2 or later you should use that instead.
"""
from collections import namedtuple
from functools import update_wrapper
from threading import RLock
_CacheInfo = namedtuple('CacheInfo', ['hits', 'misses', 'maxsize', 'currsize'])
class _HashedSeq(list):
__slots__ = 'hashvalue'
def __init__(self, tup, hash=hash):
self[:] = tup
self.hashvalue = hash(tup)
def __hash__(self):
return self.hashvalue
def _make_key(args, kwds, typed,
kwd_mark = (object(),),
fasttypes = set([int, str, frozenset, type(None)]),
sorted=sorted, tuple=tuple, type=type, len=len):
'Make a cache key from optionally typed positional and keyword arguments'
key = args
if kwds:
sorted_items = sorted(kwds.items())
key += kwd_mark
for item in sorted_items:
key += item
if typed:
key += tuple(type(v) for v in args)
if kwds:
key += tuple(type(v) for k, v in sorted_items)
elif len(key) == 1 and type(key[0]) in fasttypes:
return key[0]
return _HashedSeq(key)
def lru_cache(maxsize=100, typed=False):
"""Least-recently-used cache decorator.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
If *typed* is True, arguments of different types will be cached separately.
For example, f(3.0) and f(3) will be treated as distinct calls with
distinct results.
Arguments to the cached function must be hashable.
View the cache statistics named tuple (hits, misses, maxsize, currsize) with
f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
# Users should only access the lru_cache through its public API:
# cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version).
def decorating_function(user_function):
cache = dict()
stats = [0, 0] # make statistics updateable non-locally
HITS, MISSES = 0, 1 # names for the stats fields
make_key = _make_key
cache_get = cache.get # bound method to lookup key or return None
_len = len # localize the global len() function
lock = RLock() # because linkedlist updates aren't threadsafe
root = [] # root of the circular doubly linked list
root[:] = [root, root, None, None] # initialize by pointing to self
nonlocal_root = [root] # make updateable non-locally
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
if maxsize == 0:
def wrapper(*args, **kwds):
# no caching, just do a statistics update after a successful call
result = user_function(*args, **kwds)
stats[MISSES] += 1
return result
elif maxsize is None:
def wrapper(*args, **kwds):
# simple caching without ordering or size limit
key = make_key(args, kwds, typed)
result = cache_get(key, root) # root used here as a unique not-found sentinel
if result is not root:
stats[HITS] += 1
return result
result = user_function(*args, **kwds)
cache[key] = result
stats[MISSES] += 1
return result
else:
def wrapper(*args, **kwds):
# size limited caching that tracks accesses by recency
key = make_key(args, kwds, typed) if kwds or typed else args
with lock:
link = cache_get(key)
if link is not None:
# record recent use of the key by moving it to the front of the list
root, = nonlocal_root
link_prev, link_next, key, result = link
link_prev[NEXT] = link_next
link_next[PREV] = link_prev
last = root[PREV]
last[NEXT] = root[PREV] = link
link[PREV] = last
link[NEXT] = root
stats[HITS] += 1
return result
result = user_function(*args, **kwds)
with lock:
root, = nonlocal_root
if key in cache:
# getting here means that this same key was added to the
# cache while the lock was released. since the link
# update is already done, we need only return the
# computed result and update the count of misses.
pass
elif _len(cache) >= maxsize:
# use the old root to store the new key and result
oldroot = root
oldroot[KEY] = key
oldroot[RESULT] = result
# empty the oldest link and make it the new root
root = nonlocal_root[0] = oldroot[NEXT]
oldkey = root[KEY]
root[KEY] = root[RESULT] = None
# now update the cache dictionary for the new links
del cache[oldkey]
cache[key] = oldroot
else:
# put result in a new link at the front of the list
last = root[PREV]
link = [last, root, key, result]
last[NEXT] = root[PREV] = cache[key] = link
stats[MISSES] += 1
return result
def cache_info():
"""Report cache statistics"""
with lock:
return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
def cache_clear():
"""Clear the cache and cache statistics"""
with lock:
cache.clear()
root = nonlocal_root[0]
root[:] = [root, root, None, None]
stats[:] = [0, 0]
wrapper.__wrapped__ = user_function
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return update_wrapper(wrapper, user_function)
return decorating_function

View file

@ -0,0 +1,133 @@
# Drop in replacement for python 2.7's OrderedDict, from...
# http://pypi.python.org/pypi/ordereddict
#
# Stem users should *not* rely upon this module. It will be removed when we
# drop support for python 2.6 and below.
# Copyright (c) 2009 Raymond Hettinger
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software,
# and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
from UserDict import DictMixin
class OrderedDict(dict, DictMixin):
def __init__(self, *args, **kwds):
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' % len(args))
try:
self.__end
except AttributeError:
self.clear()
self.update(*args, **kwds)
def clear(self):
self.__end = end = []
end += [None, end, end] # sentinel node for doubly linked list
self.__map = {} # key --> [key, prev, next]
dict.clear(self)
def __setitem__(self, key, value):
if key not in self:
end = self.__end
curr = end[1]
curr[2] = end[1] = self.__map[key] = [key, curr, end]
dict.__setitem__(self, key, value)
def __delitem__(self, key):
dict.__delitem__(self, key)
key, prev, next = self.__map.pop(key)
prev[2] = next
next[1] = prev
def __iter__(self):
end = self.__end
curr = end[2]
while curr is not end:
yield curr[0]
curr = curr[2]
def __reversed__(self):
end = self.__end
curr = end[1]
while curr is not end:
yield curr[0]
curr = curr[1]
def popitem(self, last=True):
if not self:
raise KeyError('dictionary is empty')
if last:
key = reversed(self).next()
else:
key = iter(self).next()
value = self.pop(key)
return key, value
def __reduce__(self):
items = [[k, self[k]] for k in self]
tmp = self.__map, self.__end
del self.__map, self.__end
inst_dict = vars(self).copy()
self.__map, self.__end = tmp
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def keys(self):
return list(self)
setdefault = DictMixin.setdefault
update = DictMixin.update
pop = DictMixin.pop
values = DictMixin.values
items = DictMixin.items
iterkeys = DictMixin.iterkeys
itervalues = DictMixin.itervalues
iteritems = DictMixin.iteritems
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.items())
def copy(self):
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
def __eq__(self, other):
if isinstance(other, OrderedDict):
if len(self) != len(other):
return False
for p, q in zip(self.items(), other.items()):
if p != q:
return False
return True
return dict.__eq__(self, other)
def __ne__(self, other):
return not self == other

View file

@ -0,0 +1,313 @@
################################################################################
#
# Common usages for port . This is based on...
#
# https://secure.wikimedia.org/wikipedia/en/wiki/List_of_TCP_and_UDP_port numbers
# http://isc.sans.edu/services.html
#
################################################################################
port 1 => TCPMUX
port 2 => CompressNET
port 3 => CompressNET
port 5 => RJE
port 7 => Echo
port 9 => Discard
port 11 => SYSTAT
port 13 => Daytime
port 15 => netstat
port 17 => QOTD
port 18 => MSP
port 19 => CHARGEN
port 20 => FTP
port 21 => FTP
port 22 => SSH
port 23 => Telnet
port 24 => Priv-mail
port 25 => SMTP
port 34 => RF
port 35 => Printer
port 37 => TIME
port 39 => RLP
port 41 => Graphics
port 42 => WINS
port 43 => WHOIS
port 47 => NI FTP
port 49 => TACACS
port 50 => Remote Mail
port 51 => IMP
port 52 => XNS
port 53 => DNS
port 54 => XNS
port 55 => ISI-GL
port 56 => RAP
port 57 => MTP
port 58 => XNS
port 67 => BOOTP
port 68 => BOOTP
port 69 => TFTP
port 70 => Gopher
port 79 => Finger
port 80 => HTTP
port 81 => HTTP Alternate
port 82 => Torpark
port 83 => MIT ML
port 88 => Kerberos
port 90 => dnsix
port 99 => WIP
port 101 => NIC
port 102 => ISO-TSAP
port 104 => ACR/NEMA
port 105 => CCSO
port 107 => Telnet
port 108 => SNA
port 109 => POP2
port 110 => POP3
port 111 => ONC RPC
port 113 => ident
port 115 => SFTP
port 117 => UUCP
port 118 => SQL
port 119 => NNTP
port 123 => NTP
port 135 => DCE
port 137 => NetBIOS
port 138 => NetBIOS
port 139 => NetBIOS
port 143 => IMAP
port 152 => BFTP
port 153 => SGMP
port 156 => SQL
port 158 => DMSP
port 161 => SNMP
port 162 => SNMPTRAP
port 170 => Print-srv
port 177 => XDMCP
port 179 => BGP
port 194 => IRC
port 199 => SMUX
port 201 => AppleTalk
port 209 => QMTP
port 210 => ANSI
port 213 => IPX
port 218 => MPP
port 220 => IMAP
port 256 => 2DEV
port 259 => ESRO
port 264 => BGMP
port 308 => Novastor
port 311 => OSX Admin
port 318 => PKIX TSP
port 319 => PTP
port 320 => PTP
port 323 => IMMP
port 350 => MATIP
port 351 => MATIP
port 366 => ODMR
port 369 => Rpc2port ap
port 370 => codaauth2
port 371 => ClearCase
port 383 => HP Alarm Mgr
port 384 => ARNS
port 387 => AURP
port 389 => LDAP
port 401 => UPS
port 402 => Altiris
port 427 => SLP
port 443 => HTTPS
port 444 => SNPP
port 445 => SMB
port 464 => Kerberos (kpasswd)
port 465 => SMTP
port 475 => tcpnethaspsrv
port 497 => Retrospect
port 500 => ISAKMP
port 501 => STMF
port 502 => Modbus
port 504 => Citadel
port 510 => FirstClass
port 512 => Rexec
port 513 => rlogin
port 514 => rsh
port 515 => LPD
port 517 => Talk
port 518 => NTalk
port 520 => efs
port 524 => NCP
port 530 => RPC
port 531 => AIM/IRC
port 532 => netnews
port 533 => netwall
port 540 => UUCP
port 542 => commerce
port 543 => Kerberos (klogin)
port 544 => Kerberos (kshell)
port 545 => OSISoft PI
port 546 => DHCPv6
port 547 => DHCPv6
port 548 => AFP
port 550 => new-who
port 554 => RTSP
port 556 => RFS
port 560 => rmonitor
port 561 => monitor
port 563 => NNTPS
port 587 => SMTP
port 591 => FileMaker
port 593 => HTTP RPC
port 604 => TUNNEL
port 623 => ASF-RMCP
port 631 => CUPS
port 635 => RLZ DBase
port 636 => LDAPS
port 639 => MSDP
port 641 => Support oft
port 646 => LDP
port 647 => DHCP
port 648 => RRP
port 651 => IEEE-MMS
port 652 => DTCP
port 653 => Support oft
port 654 => MMS/MMP
port 657 => RMC
port 660 => OSX Admin
port 665 => sun-dr
port 666 => Doom
port 674 => ACAP
port 691 => MS Exchange
port 692 => Hyperwave-ISP
port 694 => Linux-HA
port 695 => IEEE-MMS-SSL
port 698 => OLSR
port 699 => Access Network
port 700 => EPP
port 701 => LMP
port 702 => IRIS
port 706 => SILC
port 711 => MPLS
port 712 => TBRPF
port 720 => SMQP
port 749 => Kerberos (admin)
port 750 => rfile
port 751 => pump
port 752 => qrh
port 753 => rrh
port 754 => tell send
port 760 => ns
port 782 => Conserver
port 783 => spamd
port 829 => CMP
port 843 => Flash
port 847 => DHCP
port 860 => iSCSI
port 873 => rsync
port 888 => CDDB
port 901 => SWAT
port 902-904 => VMware
port 911 => NCA
port 953 => DNS RNDC
port 981 => SofaWare Firewall
port 989 => FTPS
port 990 => FTPS
port 991 => NAS
port 992 => Telnets
port 993 => IMAPS
port 994 => IRCS
port 995 => POP3S
port 999 => ScimoreDB
port 1001 => JtoMB
port 1002 => cogbot
port 1080 => SOCKS
port 1085 => WebObjects
port 1109 => KPOP
port 1169 => Tripwire
port 1194 => OpenVPN
port 1214 => Kazaa
port 1220 => QuickTime
port 1234 => VLC
port 1241 => Nessus
port 1270 => SCOM
port 1293 => IPSec
port 1433 => MSSQL
port 1434 => MSSQL
port 1500 => NetGuard
port 1503 => MSN
port 1512 => WINS
port 1521 => Oracle
port 1526 => Oracle
port 1533 => Sametime
port 1666 => Perforce
port 1677 => GroupWise
port 1723 => PPTP
port 1725 => Steam
port 1863 => MSNP
port 2049 => NFS
port 2082 => Infowave
port 2083 => radsec
port 2086 => GNUnet
port 2087 => ELI
port 2095 => NBX SER
port 2096 => NBX DIR
port 2102-2104 => Zephyr
port 2401 => CVS
port 2525 => SMTP
port 2710 => BitTorrent
port 3074 => XBox LIVE
port 3101 => BlackBerry
port 3128 => SQUID
port 3306 => MySQL
port 3389 => WBT
port 3690 => SVN
port 3723 => Battle.net
port 3724 => WoW
port 4321 => RWHOIS
port 4643 => Virtuozzo
port 4662 => eMule
port 5003 => FileMaker
port 5050 => Yahoo IM
port 5060 => SIP
port 5061 => SIP
port 5190 => AIM/ICQ
port 5222 => Jabber
port 5223 => Jabber
port 5228 => Android Market
port 5269 => Jabber
port 5298 => Jabber
port 5432 => PostgreSQL
port 5500 => VNC
port 5556 => Freeciv
port 5666 => NRPE
port 5667 => NSCA
port 5800 => VNC
port 5900 => VNC
port 6346 => gnutella
port 6347 => gnutella
port 6660-6669 => IRC
port 6679 => IRC
port 6697 => IRC
port 6881-6999 => BitTorrent
port 8000 => iRDMI
port 8008 => HTTP Alternate
port 8010 => XMPP
port 8074 => Gadu-Gadu
port 8080 => HTTP Proxy
port 8087 => SPP
port 8088 => Radan HTTP
port 8118 => Privoxy
port 8123 => Polipo
port 8332-8333 => Bitcoin
port 8443 => PCsync HTTPS
port 8888 => NewsEDGE
port 9030 => Tor
port 9050 => Tor
port 9051 => Tor
port 9418 => Git
port 9999 => distinct
port 10000 => Webmin
port 19294 => Google Voice
port 19638 => Ensim
port 23399 => Skype
port 30301 => BitTorrent
port 33434 => traceroute

View file

@ -0,0 +1,547 @@
# Copyright 2011-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Helper functions for querying process and system information from the /proc
contents. Fetching information this way provides huge performance benefits
over lookups via system utilities (ps, netstat, etc). For instance, resolving
connections this way cuts the runtime by around 90% verses the alternatives.
These functions may not work on all platforms (only Linux?).
The method for reading these files (and a little code) are borrowed from
`psutil <https://code.google.com/p/psutil/>`_, which was written by Jay Loden,
Dave Daeschler, Giampaolo Rodola' and is under the BSD license.
**These functions are not being vended to stem users. They may change in the
future, use them at your own risk.**
.. versionchanged:: 1.3.0
Dropped the get_* prefix from several function names. The old names still
work, but are deprecated aliases.
**Module Overview:**
::
is_available - checks if proc utilities can be used on this system
system_start_time - unix timestamp for when the system started
physical_memory - memory available on this system
cwd - provides the current working directory for a process
uid - provides the user id a process is running under
memory_usage - provides the memory usage of a process
stats - queries statistics about a process
file_descriptors_used - number of file descriptors used by a process
connections - provides the connections made by a process
.. data:: Stat (enum)
Types of data available via the :func:`~stem.util.proc.stats` function.
============== ===========
Stat Description
============== ===========
**COMMAND** command name under which the process is running
**CPU_UTIME** total user time spent on the process
**CPU_STIME** total system time spent on the process
**START_TIME** when this process began, in unix time
============== ===========
"""
import base64
import os
import platform
import socket
import sys
import time
import stem.util.enum
from stem.util import log
try:
# added in python 3.2
from functools import lru_cache
except ImportError:
from stem.util.lru_cache import lru_cache
# os.sysconf is only defined on unix
try:
CLOCK_TICKS = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
except AttributeError:
CLOCK_TICKS = None
Stat = stem.util.enum.Enum(
('COMMAND', 'command'), ('CPU_UTIME', 'utime'),
('CPU_STIME', 'stime'), ('START_TIME', 'start time')
)
@lru_cache()
def is_available():
"""
Checks if proc information is available on this platform.
:returns: **True** if proc contents exist on this platform, **False** otherwise
"""
if platform.system() != 'Linux':
return False
else:
# list of process independent proc paths we use
proc_paths = ('/proc/stat', '/proc/meminfo', '/proc/net/tcp', '/proc/net/udp')
for path in proc_paths:
if not os.path.exists(path):
return False
return True
@lru_cache()
def system_start_time():
"""
Provides the unix time (seconds since epoch) when the system started.
:returns: **float** for the unix time of when the system started
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), 'system start time'
btime_line = _get_line('/proc/stat', 'btime', parameter)
try:
result = float(btime_line.strip().split()[1])
_log_runtime(parameter, '/proc/stat[btime]', start_time)
return result
except:
exc = IOError('unable to parse the /proc/stat btime entry: %s' % btime_line)
_log_failure(parameter, exc)
raise exc
@lru_cache()
def physical_memory():
"""
Provides the total physical memory on the system in bytes.
:returns: **int** for the bytes of physical memory this system has
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), 'system physical memory'
mem_total_line = _get_line('/proc/meminfo', 'MemTotal:', parameter)
try:
result = int(mem_total_line.split()[1]) * 1024
_log_runtime(parameter, '/proc/meminfo[MemTotal]', start_time)
return result
except:
exc = IOError('unable to parse the /proc/meminfo MemTotal entry: %s' % mem_total_line)
_log_failure(parameter, exc)
raise exc
def cwd(pid):
"""
Provides the current working directory for the given process.
:param int pid: process id of the process to be queried
:returns: **str** with the path of the working directory for the process
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), 'cwd'
proc_cwd_link = '/proc/%s/cwd' % pid
if pid == 0:
cwd = ''
else:
try:
cwd = os.readlink(proc_cwd_link)
except OSError:
exc = IOError('unable to read %s' % proc_cwd_link)
_log_failure(parameter, exc)
raise exc
_log_runtime(parameter, proc_cwd_link, start_time)
return cwd
def uid(pid):
"""
Provides the user ID the given process is running under.
:param int pid: process id of the process to be queried
:returns: **int** with the user id for the owner of the process
:raises: **IOError** if it can't be determined
"""
start_time, parameter = time.time(), 'uid'
status_path = '/proc/%s/status' % pid
uid_line = _get_line(status_path, 'Uid:', parameter)
try:
result = int(uid_line.split()[1])
_log_runtime(parameter, '%s[Uid]' % status_path, start_time)
return result
except:
exc = IOError('unable to parse the %s Uid entry: %s' % (status_path, uid_line))
_log_failure(parameter, exc)
raise exc
def memory_usage(pid):
"""
Provides the memory usage in bytes for the given process.
:param int pid: process id of the process to be queried
:returns: **tuple** of two ints with the memory usage of the process, of the
form **(resident_size, virtual_size)**
:raises: **IOError** if it can't be determined
"""
# checks if this is the kernel process
if pid == 0:
return (0, 0)
start_time, parameter = time.time(), 'memory usage'
status_path = '/proc/%s/status' % pid
mem_lines = _get_lines(status_path, ('VmRSS:', 'VmSize:'), parameter)
try:
residentSize = int(mem_lines['VmRSS:'].split()[1]) * 1024
virtualSize = int(mem_lines['VmSize:'].split()[1]) * 1024
_log_runtime(parameter, '%s[VmRSS|VmSize]' % status_path, start_time)
return (residentSize, virtualSize)
except:
exc = IOError('unable to parse the %s VmRSS and VmSize entries: %s' % (status_path, ', '.join(mem_lines)))
_log_failure(parameter, exc)
raise exc
def stats(pid, *stat_types):
"""
Provides process specific information. See the :data:`~stem.util.proc.Stat`
enum for valid options.
:param int pid: process id of the process to be queried
:param Stat stat_types: information to be provided back
:returns: **tuple** with all of the requested statistics as strings
:raises: **IOError** if it can't be determined
"""
if CLOCK_TICKS is None:
raise IOError('Unable to look up SC_CLK_TCK')
start_time, parameter = time.time(), 'process %s' % ', '.join(stat_types)
# the stat file contains a single line, of the form...
# 8438 (tor) S 8407 8438 8407 34818 8438 4202496...
stat_path = '/proc/%s/stat' % pid
stat_line = _get_line(stat_path, str(pid), parameter)
# breaks line into component values
stat_comp = []
cmd_start, cmd_end = stat_line.find('('), stat_line.find(')')
if cmd_start != -1 and cmd_end != -1:
stat_comp.append(stat_line[:cmd_start])
stat_comp.append(stat_line[cmd_start + 1:cmd_end])
stat_comp += stat_line[cmd_end + 1:].split()
if len(stat_comp) < 44 and _is_float(stat_comp[13], stat_comp[14], stat_comp[21]):
exc = IOError('stat file had an unexpected format: %s' % stat_path)
_log_failure(parameter, exc)
raise exc
results = []
for stat_type in stat_types:
if stat_type == Stat.COMMAND:
if pid == 0:
results.append('sched')
else:
results.append(stat_comp[1])
elif stat_type == Stat.CPU_UTIME:
if pid == 0:
results.append('0')
else:
results.append(str(float(stat_comp[13]) / CLOCK_TICKS))
elif stat_type == Stat.CPU_STIME:
if pid == 0:
results.append('0')
else:
results.append(str(float(stat_comp[14]) / CLOCK_TICKS))
elif stat_type == Stat.START_TIME:
if pid == 0:
return system_start_time()
else:
# According to documentation, starttime is in field 21 and the unit is
# jiffies (clock ticks). We divide it for clock ticks, then add the
# uptime to get the seconds since the epoch.
p_start_time = float(stat_comp[21]) / CLOCK_TICKS
results.append(str(p_start_time + system_start_time()))
_log_runtime(parameter, stat_path, start_time)
return tuple(results)
def file_descriptors_used(pid):
"""
Provides the number of file descriptors currently being used by a process.
.. versionadded:: 1.3.0
:param int pid: process id of the process to be queried
:returns: **int** of the number of file descriptors used
:raises: **IOError** if it can't be determined
"""
try:
pid = int(pid)
if pid < 0:
raise IOError("Process pids can't be negative: %s" % pid)
except (ValueError, TypeError):
raise IOError('Process pid was non-numeric: %s' % pid)
try:
return len(os.listdir('/proc/%i/fd' % pid))
except Exception as exc:
raise IOError('Unable to check number of file descriptors used: %s' % exc)
def connections(pid):
"""
Queries connection related information from the proc contents. This provides
similar results to netstat, lsof, sockstat, and other connection resolution
utilities (though the lookup is far quicker).
:param int pid: process id of the process to be queried
:returns: A listing of connection tuples of the form **[(local_ipAddr1,
local_port1, foreign_ipAddr1, foreign_port1, protocol), ...]** (addresses
and protocols are strings and ports are ints)
:raises: **IOError** if it can't be determined
"""
try:
pid = int(pid)
if pid < 0:
raise IOError("Process pids can't be negative: %s" % pid)
except (ValueError, TypeError):
raise IOError('Process pid was non-numeric: %s' % pid)
if pid == 0:
return []
# fetches the inode numbers for socket file descriptors
start_time, parameter = time.time(), 'process connections'
inodes = []
for fd in os.listdir('/proc/%s/fd' % pid):
fd_path = '/proc/%s/fd/%s' % (pid, fd)
try:
# File descriptor link, such as 'socket:[30899]'
fd_name = os.readlink(fd_path)
if fd_name.startswith('socket:['):
inodes.append(fd_name[8:-1])
except OSError as exc:
if not os.path.exists(fd_path):
continue # descriptors may shift while we're in the middle of iterating over them
# most likely couldn't be read due to permissions
exc = IOError('unable to determine file descriptor destination (%s): %s' % (exc, fd_path))
_log_failure(parameter, exc)
raise exc
if not inodes:
# unable to fetch any connections for this process
return []
# check for the connection information from the /proc/net contents
conn = []
for proc_file_path in ('/proc/net/tcp', '/proc/net/udp'):
try:
proc_file = open(proc_file_path)
proc_file.readline() # skip the first line
for line in proc_file:
_, l_addr, f_addr, status, _, _, _, _, _, inode = line.split()[:10]
if inode in inodes:
# if a tcp connection, skip if it isn't yet established
if proc_file_path.endswith('/tcp') and status != '01':
continue
local_ip, local_port = _decode_proc_address_encoding(l_addr)
foreign_ip, foreign_port = _decode_proc_address_encoding(f_addr)
protocol = proc_file_path[10:]
conn.append((local_ip, local_port, foreign_ip, foreign_port, protocol))
proc_file.close()
except IOError as exc:
exc = IOError("unable to read '%s': %s" % (proc_file_path, exc))
_log_failure(parameter, exc)
raise exc
except Exception as exc:
exc = IOError("unable to parse '%s': %s" % (proc_file_path, exc))
_log_failure(parameter, exc)
raise exc
_log_runtime(parameter, '/proc/net/[tcp|udp]', start_time)
return conn
def _decode_proc_address_encoding(addr):
"""
Translates an address entry in the /proc/net/* contents to a human readable
form (`reference <http://linuxdevcenter.com/pub/a/linux/2000/11/16/LinuxAdmin.html>`_,
for instance:
::
"0500000A:0016" -> ("10.0.0.5", 22)
:param str addr: proc address entry to be decoded
:returns: **tuple** of the form **(addr, port)**, with addr as a string and port an int
"""
ip, port = addr.split(':')
# the port is represented as a two-byte hexadecimal number
port = int(port, 16)
if sys.version_info >= (3,):
ip = ip.encode('ascii')
# The IPv4 address portion is a little-endian four-byte hexadecimal number.
# That is, the least significant byte is listed first, so we need to reverse
# the order of the bytes to convert it to an IP address.
#
# This needs to account for the endian ordering as per...
# http://code.google.com/p/psutil/issues/detail?id=201
# https://trac.torproject.org/projects/tor/ticket/4777
if sys.byteorder == 'little':
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip)[::-1])
else:
ip = socket.inet_ntop(socket.AF_INET, base64.b16decode(ip))
return (ip, port)
def _is_float(*value):
try:
for v in value:
float(v)
return True
except ValueError:
return False
def _get_line(file_path, line_prefix, parameter):
return _get_lines(file_path, (line_prefix, ), parameter)[line_prefix]
def _get_lines(file_path, line_prefixes, parameter):
"""
Fetches lines with the given prefixes from a file. This only provides back
the first instance of each prefix.
:param str file_path: path of the file to read
:param tuple line_prefixes: string prefixes of the lines to return
:param str parameter: description of the proc attribute being fetch
:returns: mapping of prefixes to the matching line
:raises: **IOError** if unable to read the file or can't find all of the prefixes
"""
try:
remaining_prefixes = list(line_prefixes)
proc_file, results = open(file_path), {}
for line in proc_file:
if not remaining_prefixes:
break # found everything we're looking for
for prefix in remaining_prefixes:
if line.startswith(prefix):
results[prefix] = line
remaining_prefixes.remove(prefix)
break
proc_file.close()
if remaining_prefixes:
if len(remaining_prefixes) == 1:
msg = '%s did not contain a %s entry' % (file_path, remaining_prefixes[0])
else:
msg = '%s did not contain %s entries' % (file_path, ', '.join(remaining_prefixes))
raise IOError(msg)
else:
return results
except IOError as exc:
_log_failure(parameter, exc)
raise exc
def _log_runtime(parameter, proc_location, start_time):
"""
Logs a message indicating a successful proc query.
:param str parameter: description of the proc attribute being fetch
:param str proc_location: proc files we were querying
:param int start_time: unix time for when this query was started
"""
runtime = time.time() - start_time
log.debug('proc call (%s): %s (runtime: %0.4f)' % (parameter, proc_location, runtime))
def _log_failure(parameter, exc):
"""
Logs a message indicating that the proc query failed.
:param str parameter: description of the proc attribute being fetch
:param Exception exc: exception that we're raising
"""
log.debug('proc call failed (%s): %s' % (parameter, exc))
# TODO: drop with stem 2.x
# We renamed our methods to drop a redundant 'get_*' prefix, so alias the old
# names for backward compatability.
get_system_start_time = system_start_time
get_physical_memory = physical_memory
get_cwd = cwd
get_uid = uid
get_memory_usage = memory_usage
get_stats = stats
get_connections = connections

View file

@ -0,0 +1,558 @@
# Copyright 2012-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Toolkit for various string activity.
.. versionchanged:: 1.3.0
Dropped the get_* prefix from several function names. The old names still
work, but are deprecated aliases.
**Module Overview:**
::
crop - shortens string to a given length
size_label - human readable label for a number of bytes
time_label - human readable label for a number of seconds
time_labels - human readable labels for each time unit
short_time_label - condensed time label output
parse_short_time_label - seconds represented by a short time label
"""
import codecs
import datetime
import re
import sys
import stem.prereq
import stem.util.enum
from stem import str_type
# label conversion tuples of the form...
# (bits / bytes / seconds, short label, long label)
SIZE_UNITS_BITS = (
(140737488355328.0, ' Pb', ' Petabit'),
(137438953472.0, ' Tb', ' Terabit'),
(134217728.0, ' Gb', ' Gigabit'),
(131072.0, ' Mb', ' Megabit'),
(128.0, ' Kb', ' Kilobit'),
(0.125, ' b', ' Bit'),
)
SIZE_UNITS_BYTES = (
(1125899906842624.0, ' PB', ' Petabyte'),
(1099511627776.0, ' TB', ' Terabyte'),
(1073741824.0, ' GB', ' Gigabyte'),
(1048576.0, ' MB', ' Megabyte'),
(1024.0, ' KB', ' Kilobyte'),
(1.0, ' B', ' Byte'),
)
TIME_UNITS = (
(86400.0, 'd', ' day'),
(3600.0, 'h', ' hour'),
(60.0, 'm', ' minute'),
(1.0, 's', ' second'),
)
_timestamp_re = re.compile(r'(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})')
if stem.prereq.is_python_3():
def _to_bytes_impl(msg):
if isinstance(msg, str):
return codecs.latin_1_encode(msg, 'replace')[0]
else:
return msg
def _to_unicode_impl(msg):
if msg is not None and not isinstance(msg, str):
return msg.decode('utf-8', 'replace')
else:
return msg
else:
def _to_bytes_impl(msg):
if msg is not None and isinstance(msg, str_type):
return codecs.latin_1_encode(msg, 'replace')[0]
else:
return msg
def _to_unicode_impl(msg):
if msg is not None and not isinstance(msg, str_type):
return msg.decode('utf-8', 'replace')
else:
return msg
def _to_bytes(msg):
"""
Provides the ASCII bytes for the given string. This is purely to provide
python 3 compatability, normalizing the unicode/ASCII change in the version
bump. For an explanation of this see...
http://python3porting.com/problems.html#nicer-solutions
:param str,unicode msg: string to be converted
:returns: ASCII bytes for string
"""
return _to_bytes_impl(msg)
def _to_unicode(msg):
"""
Provides the unicode string for the given ASCII bytes. This is purely to
provide python 3 compatability, normalizing the unicode/ASCII change in the
version bump.
:param str,unicode msg: string to be converted
:returns: unicode conversion
"""
return _to_unicode_impl(msg)
def _to_camel_case(label, divider = '_', joiner = ' '):
"""
Converts the given string to camel case, ie:
::
>>> _to_camel_case('I_LIKE_PEPPERJACK!')
'I Like Pepperjack!'
:param str label: input string to be converted
:param str divider: word boundary
:param str joiner: replacement for word boundaries
:returns: camel cased string
"""
words = []
for entry in label.split(divider):
if len(entry) == 0:
words.append('')
elif len(entry) == 1:
words.append(entry.upper())
else:
words.append(entry[0].upper() + entry[1:].lower())
return joiner.join(words)
# This needs to be defined after _to_camel_case() to avoid a circular
# dependency with the enum module.
Ending = stem.util.enum.Enum('ELLIPSE', 'HYPHEN')
def crop(msg, size, min_word_length = 4, min_crop = 0, ending = Ending.ELLIPSE, get_remainder = False):
"""
Shortens a string to a given length.
If we crop content then a given ending is included (counting itself toward
the size limitation). This crops on word breaks so we only include a word if
we can display at least **min_word_length** characters of it.
If there isn't room for even a truncated single word (or one word plus the
ellipse if including those) then this provides an empty string.
If a cropped string ends with a comma or period then it's stripped (unless
we're providing the remainder back). For example...
>>> crop('This is a looooong message', 17)
'This is a looo...'
>>> crop('This is a looooong message', 12)
'This is a...'
>>> crop('This is a looooong message', 3)
''
The whole point of this method is to provide human friendly croppings, and as
such details of how this works might change in the future. Callers should not
rely on the details of how this crops.
.. versionadded:: 1.3.0
:param str msg: text to be processed
:param int size: space available for text
:param int min_word_length: minimum characters before which a word is
dropped, requires whole word if **None**
:param int min_crop: minimum characters that must be dropped if a word is
cropped
:param Ending ending: type of ending used when truncating, no special
truncation is used if **None**
:param bool get_remainder: returns a tuple with the second part being the
cropped portion of the message
:returns: **str** of the text truncated to the given length
"""
# checks if there's room for the whole message
if len(msg) <= size:
return (msg, '') if get_remainder else msg
if size < 0:
raise ValueError("Crop size can't be negative (received %i)" % size)
elif min_word_length and min_word_length < 0:
raise ValueError("Crop's min_word_length can't be negative (received %i)" % min_word_length)
elif min_crop < 0:
raise ValueError("Crop's min_crop can't be negative (received %i)" % min_crop)
# since we're cropping, the effective space available is less with an
# ellipse, and cropping words requires an extra space for hyphens
if ending == Ending.ELLIPSE:
size -= 3
elif min_word_length and ending == Ending.HYPHEN:
min_word_length += 1
if min_word_length is None:
min_word_length = sys.maxsize
# checks if there isn't the minimum space needed to include anything
last_wordbreak = msg.rfind(' ', 0, size + 1)
if last_wordbreak == -1:
# we're splitting the first word
if size < min_word_length:
return ('', msg) if get_remainder else ''
include_crop = True
else:
last_wordbreak = len(msg[:last_wordbreak].rstrip()) # drops extra ending whitespaces
include_crop = size - last_wordbreak - 1 >= min_word_length
# if there's a max crop size then make sure we're cropping at least that many characters
if include_crop and min_crop:
next_wordbreak = msg.find(' ', size)
if next_wordbreak == -1:
next_wordbreak = len(msg)
include_crop = next_wordbreak - size + 1 >= min_crop
if include_crop:
return_msg, remainder = msg[:size], msg[size:]
if ending == Ending.HYPHEN:
remainder = return_msg[-1] + remainder
return_msg = return_msg[:-1].rstrip() + '-'
else:
return_msg, remainder = msg[:last_wordbreak], msg[last_wordbreak:]
# if this is ending with a comma or period then strip it off
if not get_remainder and return_msg and return_msg[-1] in (',', '.'):
return_msg = return_msg[:-1]
if ending == Ending.ELLIPSE:
return_msg = return_msg.rstrip() + '...'
return (return_msg, remainder) if get_remainder else return_msg
def size_label(byte_count, decimal = 0, is_long = False, is_bytes = True):
"""
Converts a number of bytes into a human readable label in its most
significant units. For instance, 7500 bytes would return "7 KB". If the
is_long option is used this expands unit labels to be the properly pluralized
full word (for instance 'Kilobytes' rather than 'KB'). Units go up through
petabytes.
::
>>> size_label(2000000)
'1 MB'
>>> size_label(1050, 2)
'1.02 KB'
>>> size_label(1050, 3, True)
'1.025 Kilobytes'
:param int byte_count: number of bytes to be converted
:param int decimal: number of decimal digits to be included
:param bool is_long: expands units label
:param bool is_bytes: provides units in bytes if **True**, bits otherwise
:returns: **str** with human readable representation of the size
"""
if is_bytes:
return _get_label(SIZE_UNITS_BYTES, byte_count, decimal, is_long)
else:
return _get_label(SIZE_UNITS_BITS, byte_count, decimal, is_long)
def time_label(seconds, decimal = 0, is_long = False):
"""
Converts seconds into a time label truncated to its most significant units.
For instance, 7500 seconds would return "2h". Units go up through days.
This defaults to presenting single character labels, but if the is_long
option is used this expands labels to be the full word (space included and
properly pluralized). For instance, "4h" would be "4 hours" and "1m" would
become "1 minute".
::
>>> time_label(10000)
'2h'
>>> time_label(61, 1, True)
'1.0 minute'
>>> time_label(61, 2, True)
'1.01 minutes'
:param int seconds: number of seconds to be converted
:param int decimal: number of decimal digits to be included
:param bool is_long: expands units label
:returns: **str** with human readable representation of the time
"""
return _get_label(TIME_UNITS, seconds, decimal, is_long)
def time_labels(seconds, is_long = False):
"""
Provides a list of label conversions for each time unit, starting with its
most significant units on down. Any counts that evaluate to zero are omitted.
For example...
::
>>> time_labels(400)
['6m', '40s']
>>> time_labels(3640, True)
['1 hour', '40 seconds']
:param int seconds: number of seconds to be converted
:param bool is_long: expands units label
:returns: **list** of strings with human readable representations of the time
"""
time_labels = []
for count_per_unit, _, _ in TIME_UNITS:
if abs(seconds) >= count_per_unit:
time_labels.append(_get_label(TIME_UNITS, seconds, 0, is_long))
seconds %= count_per_unit
return time_labels
def short_time_label(seconds):
"""
Provides a time in the following format:
[[dd-]hh:]mm:ss
::
>>> short_time_label(111)
'01:51'
>>> short_time_label(544100)
'6-07:08:20'
:param int seconds: number of seconds to be converted
:returns: **str** with the short representation for the time
:raises: **ValueError** if the input is negative
"""
if seconds < 0:
raise ValueError("Input needs to be a non-negative integer, got '%i'" % seconds)
time_comp = {}
for amount, _, label in TIME_UNITS:
count = int(seconds / amount)
seconds %= amount
time_comp[label.strip()] = count
label = '%02i:%02i' % (time_comp['minute'], time_comp['second'])
if time_comp['day']:
label = '%i-%02i:%s' % (time_comp['day'], time_comp['hour'], label)
elif time_comp['hour']:
label = '%02i:%s' % (time_comp['hour'], label)
return label
def parse_short_time_label(label):
"""
Provides the number of seconds corresponding to the formatting used for the
cputime and etime fields of ps:
[[dd-]hh:]mm:ss or mm:ss.ss
::
>>> parse_short_time_label('01:51')
111
>>> parse_short_time_label('6-07:08:20')
544100
:param str label: time entry to be parsed
:returns: **int** with the number of seconds represented by the label
:raises: **ValueError** if input is malformed
"""
days, hours, minutes, seconds = '0', '0', '0', '0'
if '-' in label:
days, label = label.split('-', 1)
time_comp = label.split(':')
if len(time_comp) == 3:
hours, minutes, seconds = time_comp
elif len(time_comp) == 2:
minutes, seconds = time_comp
else:
raise ValueError("Invalid time format, we expected '[[dd-]hh:]mm:ss' or 'mm:ss.ss': %s" % label)
try:
time_sum = int(float(seconds))
time_sum += int(minutes) * 60
time_sum += int(hours) * 3600
time_sum += int(days) * 86400
return time_sum
except ValueError:
raise ValueError('Non-numeric value in time entry: %s' % label)
def _parse_timestamp(entry):
"""
Parses the date and time that in format like like...
::
2012-11-08 16:48:41
:param str entry: timestamp to be parsed
:returns: **datetime** for the time represented by the timestamp
:raises: **ValueError** if the timestamp is malformed
"""
if not isinstance(entry, (str, str_type)):
raise ValueError('parse_timestamp() input must be a str, got a %s' % type(entry))
try:
time = [int(x) for x in _timestamp_re.match(entry).groups()]
except AttributeError:
raise ValueError('Expected timestamp in format YYYY-MM-DD HH:MM:ss but got ' + entry)
return datetime.datetime(time[0], time[1], time[2], time[3], time[4], time[5])
def _parse_iso_timestamp(entry):
"""
Parses the ISO 8601 standard that provides for timestamps like...
::
2012-11-08T16:48:41.420251
:param str entry: timestamp to be parsed
:returns: **datetime** for the time represented by the timestamp
:raises: **ValueError** if the timestamp is malformed
"""
if not isinstance(entry, (str, str_type)):
raise ValueError('parse_iso_timestamp() input must be a str, got a %s' % type(entry))
# based after suggestions from...
# http://stackoverflow.com/questions/127803/how-to-parse-iso-formatted-date-in-python
if '.' in entry:
timestamp_str, microseconds = entry.split('.')
else:
timestamp_str, microseconds = entry, '000000'
if len(microseconds) != 6 or not microseconds.isdigit():
raise ValueError("timestamp's microseconds should be six digits")
if timestamp_str[10] == 'T':
timestamp_str = timestamp_str[:10] + ' ' + timestamp_str[11:]
else:
raise ValueError("timestamp didn't contain delimeter 'T' between date and time")
timestamp = _parse_timestamp(timestamp_str)
return timestamp + datetime.timedelta(microseconds = int(microseconds))
def _get_label(units, count, decimal, is_long):
"""
Provides label corresponding to units of the highest significance in the
provided set. This rounds down (ie, integer truncation after visible units).
:param tuple units: type of units to be used for conversion, containing
(count_per_unit, short_label, long_label)
:param int count: number of base units being converted
:param int decimal: decimal precision of label
:param bool is_long: uses the long label if **True**, short label otherwise
"""
# formatted string for the requested number of digits
label_format = '%%.%if' % decimal
if count < 0:
label_format = '-' + label_format
count = abs(count)
elif count == 0:
units_label = units[-1][2] + 's' if is_long else units[-1][1]
return '%s%s' % (label_format % count, units_label)
for count_per_unit, short_label, long_label in units:
if count >= count_per_unit:
# Rounding down with a '%f' is a little clunky. Reducing the count so
# it'll divide evenly as the rounded down value.
count -= count % (count_per_unit / (10 ** decimal))
count_label = label_format % (count / count_per_unit)
if is_long:
# Pluralize if any of the visible units make it greater than one. For
# instance 1.0003 is plural but 1.000 isn't.
if decimal > 0:
is_plural = count > count_per_unit
else:
is_plural = count >= count_per_unit * 2
return count_label + long_label + ('s' if is_plural else '')
else:
return count_label + short_label
# TODO: drop with stem 2.x
# We renamed our methods to drop a redundant 'get_*' prefix, so alias the old
# names for backward compatability.
get_size_label = size_label
get_time_label = time_label
get_time_labels = time_labels
get_short_time_label = short_time_label

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,116 @@
# Copyright 2011-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Utilities for working with the terminal.
**Module Overview:**
::
format - wrap text with ANSI for the given colors or attributes
.. data:: Color (enum)
.. data:: BgColor (enum)
Enumerations for foreground or background terminal color.
=========== ===========
Color Description
=========== ===========
**BLACK** black color
**BLUE** blue color
**CYAN** cyan color
**GREEN** green color
**MAGENTA** magenta color
**RED** red color
**WHITE** white color
**YELLOW** yellow color
=========== ===========
.. data:: Attr (enum)
Enumerations of terminal text attributes.
=================== ===========
Attr Description
=================== ===========
**BOLD** heavy typeface
**HILIGHT** inverted foreground and background
**UNDERLINE** underlined text
**READLINE_ESCAPE** wrap encodings in `RL_PROMPT_START_IGNORE and RL_PROMPT_END_IGNORE sequences <https://stackoverflow.com/questions/9468435/look-how-to-fix-column-calculation-in-python-readline-if-use-color-prompt>`_
=================== ===========
"""
import stem.util.enum
import stem.util.str_tools
TERM_COLORS = ('BLACK', 'RED', 'GREEN', 'YELLOW', 'BLUE', 'MAGENTA', 'CYAN', 'WHITE')
# DISABLE_COLOR_SUPPORT is *not* being vended to Stem users. This is likely to
# go away if I can think of a more graceful method for color toggling.
DISABLE_COLOR_SUPPORT = False
Color = stem.util.enum.Enum(*TERM_COLORS)
BgColor = stem.util.enum.Enum(*['BG_' + color for color in TERM_COLORS])
Attr = stem.util.enum.Enum('BOLD', 'UNDERLINE', 'HILIGHT', 'READLINE_ESCAPE')
# mappings of terminal attribute enums to their ANSI escape encoding
FG_ENCODING = dict([(list(Color)[i], str(30 + i)) for i in range(8)])
BG_ENCODING = dict([(list(BgColor)[i], str(40 + i)) for i in range(8)])
ATTR_ENCODING = {Attr.BOLD: '1', Attr.UNDERLINE: '4', Attr.HILIGHT: '7'}
CSI = '\x1B[%sm'
RESET = CSI % '0'
def format(msg, *attr):
"""
Simple terminal text formatting using `ANSI escape sequences
<https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_codes>`_.
The following are some toolkits providing similar capabilities:
* `django.utils.termcolors <https://github.com/django/django/blob/master/django/utils/termcolors.py>`_
* `termcolor <https://pypi.python.org/pypi/termcolor>`_
* `colorama <https://pypi.python.org/pypi/colorama>`_
:param str msg: string to be formatted
:param str attr: text attributes, this can be :data:`~stem.util.term.Color`,
:data:`~stem.util.term.BgColor`, or :data:`~stem.util.term.Attr` enums
and are case insensitive (so strings like 'red' are fine)
:returns: **str** wrapped with ANSI escape encodings, starting with the given
attributes and ending with a reset
"""
if DISABLE_COLOR_SUPPORT:
return msg
# if we have reset sequences in the message then apply our attributes
# after each of them
if RESET in msg:
return ''.join([format(comp, *attr) for comp in msg.split(RESET)])
encodings = []
for text_attr in attr:
text_attr, encoding = stem.util.str_tools._to_camel_case(text_attr), None
encoding = FG_ENCODING.get(text_attr, encoding)
encoding = BG_ENCODING.get(text_attr, encoding)
encoding = ATTR_ENCODING.get(text_attr, encoding)
if encoding:
encodings.append(encoding)
if encodings:
prefix, suffix = CSI % ';'.join(encodings), RESET
if Attr.READLINE_ESCAPE in attr:
prefix = '\001%s\002' % prefix
suffix = '\001%s\002' % suffix
return prefix + msg + suffix
else:
return msg

View file

@ -0,0 +1,341 @@
# Copyright 2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Helper functions for testing.
.. versionadded:: 1.2.0
::
clean_orphaned_pyc - delete *.pyc files without corresponding *.py
is_pyflakes_available - checks if pyflakes is available
is_pep8_available - checks if pep8 is available
stylistic_issues - checks for PEP8 and other stylistic issues
pyflakes_issues - static checks for problems via pyflakes
"""
import collections
import linecache
import os
import re
import stem.util.conf
import stem.util.system
CONFIG = stem.util.conf.config_dict('test', {
'pep8.ignore': [],
'pyflakes.ignore': [],
'exclude_paths': [],
})
Issue = collections.namedtuple('Issue', [
'line_number',
'message',
'line',
])
def clean_orphaned_pyc(paths):
"""
Deletes any file with a *.pyc extention without a corresponding *.py. This
helps to address a common gotcha when deleting python files...
* You delete module 'foo.py' and run the tests to ensure that you haven't
broken anything. They pass, however there *are* still some 'import foo'
statements that still work because the bytecode (foo.pyc) is still around.
* You push your change.
* Another developer clones our repository and is confused because we have a
bunch of ImportErrors.
:param list paths: paths to search for orphaned pyc files
:returns: list of absolute paths that were deleted
"""
orphaned_pyc = []
for path in paths:
for pyc_path in stem.util.system.files_with_suffix(path, '.pyc'):
py_path = pyc_path[:-1]
# If we're running python 3 then the *.pyc files are no longer bundled
# with the *.py. Rather, they're in a __pycache__ directory.
pycache = '%s__pycache__%s' % (os.path.sep, os.path.sep)
if pycache in pyc_path:
directory, pycache_filename = pyc_path.split(pycache, 1)
if not pycache_filename.endswith('.pyc'):
continue # should look like 'test_tools.cpython-32.pyc'
py_path = os.path.join(directory, pycache_filename.split('.')[0] + '.py')
if not os.path.exists(py_path):
orphaned_pyc.append(pyc_path)
os.remove(pyc_path)
return orphaned_pyc
def is_pyflakes_available():
"""
Checks if pyflakes is availalbe.
:returns: **True** if we can use pyflakes and **False** otherwise
"""
try:
import pyflakes.api
import pyflakes.reporter
return True
except ImportError:
return False
def is_pep8_available():
"""
Checks if pep8 is availalbe.
:returns: **True** if we can use pep8 and **False** otherwise
"""
try:
import pep8
if not hasattr(pep8, 'BaseReport'):
raise ImportError()
return True
except ImportError:
return False
def stylistic_issues(paths, check_two_space_indents = False, check_newlines = False, check_trailing_whitespace = False, check_exception_keyword = False, prefer_single_quotes = False):
"""
Checks for stylistic issues that are an issue according to the parts of PEP8
we conform to. You can suppress PEP8 issues by making a 'test' configuration
that sets 'pep8.ignore'.
For example, with a 'test/settings.cfg' of...
::
# PEP8 compliance issues that we're ignoreing...
#
# * E111 and E121 four space indentations
# * E501 line is over 79 characters
pep8.ignore E111
pep8.ignore E121
pep8.ignore E501
... you can then run tests with...
::
import stem.util.conf
test_config = stem.util.conf.get_config('test')
test_config.load('test/settings.cfg')
issues = stylistic_issues('my_project')
If a 'exclude_paths' was set in our test config then we exclude any absolute
paths matching those regexes.
.. versionchanged:: 1.3.0
Renamed from get_stylistic_issues() to stylistic_issues(). The old name
still works as an alias, but will be dropped in Stem version 2.0.0.
.. versionchanged:: 1.4.0
Changing tuples in return value to be namedtuple instances, and adding the
line that had the issue.
.. versionchanged:: 1.4.0
Added the prefer_single_quotes option.
:param list paths: paths to search for stylistic issues
:param bool check_two_space_indents: check for two space indentations and
that no tabs snuck in
:param bool check_newlines: check that we have standard newlines (\\n), not
windows (\\r\\n) nor classic mac (\\r)
:param bool check_trailing_whitespace: check that our lines don't end with
trailing whitespace
:param bool check_exception_keyword: checks that we're using 'as' for
exceptions rather than a comma
:param bool prefer_single_quotes: standardize on using single rather than
double quotes for strings, when reasonable
:returns: **dict** of the form ``path => [(line_number, message)...]``
"""
issues = {}
if is_pep8_available():
import pep8
class StyleReport(pep8.BaseReport):
def __init__(self, options):
super(StyleReport, self).__init__(options)
def error(self, line_number, offset, text, check):
code = super(StyleReport, self).error(line_number, offset, text, check)
if code:
issues.setdefault(self.filename, []).append(Issue(line_number, '%s %s' % (code, text), text))
style_checker = pep8.StyleGuide(ignore = CONFIG['pep8.ignore'], reporter = StyleReport)
style_checker.check_files(list(_python_files(paths)))
if check_two_space_indents or check_newlines or check_trailing_whitespace or check_exception_keyword:
for path in _python_files(paths):
with open(path) as f:
file_contents = f.read()
lines = file_contents.split('\n')
is_block_comment = False
for index, line in enumerate(lines):
whitespace, content = re.match('^(\s*)(.*)$', line).groups()
# TODO: This does not check that block indentations are two spaces
# because differentiating source from string blocks ("""foo""") is more
# of a pita than I want to deal with right now.
if '"""' in content:
is_block_comment = not is_block_comment
if check_two_space_indents and '\t' in whitespace:
issues.setdefault(path, []).append(Issue(index + 1, 'indentation has a tab', line))
elif check_newlines and '\r' in content:
issues.setdefault(path, []).append(Issue(index + 1, 'contains a windows newline', line))
elif check_trailing_whitespace and content != content.rstrip():
issues.setdefault(path, []).append(Issue(index + 1, 'line has trailing whitespace', line))
elif check_exception_keyword and content.lstrip().startswith('except') and content.endswith(', exc:'):
# Python 2.6 - 2.7 supports two forms for exceptions...
#
# except ValueError, exc:
# except ValueError as exc:
#
# The former is the old method and no longer supported in python 3
# going forward.
# TODO: This check only works if the exception variable is called
# 'exc'. We should generalize this via a regex so other names work
# too.
issues.setdefault(path, []).append(Issue(index + 1, "except clause should use 'as', not comma", line))
if prefer_single_quotes and line and not is_block_comment:
content = line.strip().split('#', 1)[0]
if '"' in content and "'" not in content and '"""' not in content and not content.endswith('\\'):
# Checking if the line already has any single quotes since that
# usually means double quotes are preferable for the content (for
# instance "I'm hungry"). Also checking for '\' at the end since
# that can indicate a multi-line string.
issues.setdefault(path, []).append(Issue(index + 1, "use single rather than double quotes", line))
return issues
def pyflakes_issues(paths):
"""
Performs static checks via pyflakes. False positives can be ignored via
'pyflakes.ignore' entries in our 'test' config. For instance...
::
pyflakes.ignore stem/util/test_tools.py => 'pyflakes' imported but unused
pyflakes.ignore stem/util/test_tools.py => 'pep8' imported but unused
If a 'exclude_paths' was set in our test config then we exclude any absolute
paths matching those regexes.
.. versionchanged:: 1.3.0
Renamed from get_pyflakes_issues() to pyflakes_issues(). The old name
still works as an alias, but will be dropped in Stem version 2.0.0.
.. versionchanged:: 1.4.0
Changing tuples in return value to be namedtuple instances, and adding the
line that had the issue.
:param list paths: paths to search for problems
:returns: dict of the form ``path => [(line_number, message)...]``
"""
issues = {}
if is_pyflakes_available():
import pyflakes.api
import pyflakes.reporter
class Reporter(pyflakes.reporter.Reporter):
def __init__(self):
self._ignored_issues = {}
for line in CONFIG['pyflakes.ignore']:
path, issue = line.split('=>')
self._ignored_issues.setdefault(path.strip(), []).append(issue.strip())
def unexpectedError(self, filename, msg):
self._register_issue(filename, None, msg, None)
def syntaxError(self, filename, msg, lineno, offset, text):
self._register_issue(filename, lineno, msg, text)
def flake(self, msg):
self._register_issue(msg.filename, msg.lineno, msg.message % msg.message_args, None)
def _is_ignored(self, path, issue):
# Paths in pyflakes_ignore are relative, so we need to check to see if our
# path ends with any of them.
for ignored_path, ignored_issues in self._ignored_issues.items():
if path.endswith(ignored_path) and issue in ignored_issues:
return True
return False
def _register_issue(self, path, line_number, issue, line):
if not self._is_ignored(path, issue):
if path and line_number and not line:
line = linecache.getline(path, line_number)
issues.setdefault(path, []).append(Issue(line_number, issue, line))
reporter = Reporter()
for path in _python_files(paths):
pyflakes.api.checkPath(path, reporter)
return issues
def _python_files(paths):
for path in paths:
for file_path in stem.util.system.files_with_suffix(path, '.py'):
skip = False
for exclude_path in CONFIG['exclude_paths']:
if re.match(exclude_path, file_path):
skip = True
break
if not skip:
yield file_path
# TODO: drop with stem 2.x
# We renamed our methods to drop a redundant 'get_*' prefix, so alias the old
# names for backward compatability.
get_stylistic_issues = stylistic_issues
get_pyflakes_issues = pyflakes_issues

View file

@ -0,0 +1,151 @@
# Copyright 2012-2015, Damian Johnson and The Tor Project
# See LICENSE for licensing information
"""
Miscellaneous utility functions for working with tor.
.. versionadded:: 1.2.0
**Module Overview:**
::
is_valid_fingerprint - checks if a string is a valid tor relay fingerprint
is_valid_nickname - checks if a string is a valid tor relay nickname
is_valid_circuit_id - checks if a string is a valid tor circuit id
is_valid_stream_id - checks if a string is a valid tor stream id
is_valid_connection_id - checks if a string is a valid tor connection id
is_valid_hidden_service_address - checks if a string is a valid hidden service address
is_hex_digits - checks if a string is only made up of hex digits
"""
import re
# The control-spec defines the following as...
#
# Fingerprint = "$" 40*HEXDIG
# NicknameChar = "a"-"z" / "A"-"Z" / "0" - "9"
# Nickname = 1*19 NicknameChar
#
# CircuitID = 1*16 IDChar
# IDChar = ALPHA / DIGIT
#
# HEXDIG is defined in RFC 5234 as being uppercase and used in RFC 5987 as
# case insensitive. Tor doesn't define this in the spec so flipping a coin
# and going with case insensitive.
NICKNAME_PATTERN = re.compile('^[a-zA-Z0-9]{1,19}$')
CIRC_ID_PATTERN = re.compile('^[a-zA-Z0-9]{1,16}$')
# Hidden service addresses are sixteen base32 characters.
HS_ADDRESS_PATTERN = re.compile('^[a-z2-7]{16}$')
def is_valid_fingerprint(entry, check_prefix = False):
"""
Checks if a string is a properly formatted relay fingerprint. This checks for
a '$' prefix if check_prefix is true, otherwise this only validates the hex
digits.
:param str entry: string to be checked
:param bool check_prefix: checks for a '$' prefix
:returns: **True** if the string could be a relay fingerprint, **False** otherwise
"""
try:
if check_prefix:
if not entry or entry[0] != '$':
return False
entry = entry[1:]
return is_hex_digits(entry, 40)
except TypeError:
return False
def is_valid_nickname(entry):
"""
Checks if a string is a valid format for being a nickname.
:param str entry: string to be checked
:returns: **True** if the string could be a nickname, **False** otherwise
"""
try:
return bool(NICKNAME_PATTERN.match(entry))
except TypeError:
return False
def is_valid_circuit_id(entry):
"""
Checks if a string is a valid format for being a circuit identifier.
:returns: **True** if the string could be a circuit id, **False** otherwise
"""
try:
return bool(CIRC_ID_PATTERN.match(entry))
except TypeError:
return False
def is_valid_stream_id(entry):
"""
Checks if a string is a valid format for being a stream identifier.
Currently, this is just an alias to :func:`~stem.util.tor_tools.is_valid_circuit_id`.
:returns: **True** if the string could be a stream id, **False** otherwise
"""
return is_valid_circuit_id(entry)
def is_valid_connection_id(entry):
"""
Checks if a string is a valid format for being a connection identifier.
Currently, this is just an alias to :func:`~stem.util.tor_tools.is_valid_circuit_id`.
:returns: **True** if the string could be a connection id, **False** otherwise
"""
return is_valid_circuit_id(entry)
def is_valid_hidden_service_address(entry):
"""
Checks if a string is a valid format for being a hidden service address (not
including the '.onion' suffix).
:returns: **True** if the string could be a hidden service address, **False** otherwise
"""
try:
return bool(HS_ADDRESS_PATTERN.match(entry))
except TypeError:
return False
def is_hex_digits(entry, count):
"""
Checks if a string is the given number of hex digits. Digits represented by
letters are case insensitive.
:param str entry: string to be checked
:param int count: number of hex digits to be checked for
:returns: **True** if the given number of hex digits, **False** otherwise
"""
try:
if len(entry) != count:
return False
int(entry, 16) # attempt to convert it as hex
return True
except (ValueError, TypeError):
return False