Switch to python3
This commit is contained in:
parent
531041e89a
commit
9ba4b6a91a
5286 changed files with 677347 additions and 576888 deletions
199
Shared/lib/python3.4/site-packages/stdnum/numdb.py
Normal file
199
Shared/lib/python3.4/site-packages/stdnum/numdb.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
# numdb.py - module for handling hierarchically organised numbers
|
||||
#
|
||||
# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
|
||||
"""Query structured number format files with number properties.
|
||||
|
||||
This module contains functions for reading and querying a database that
|
||||
stores numbers that use a hierarchical format (e.g. ISBN, IBAN, phone
|
||||
numbers, etc).
|
||||
|
||||
To read a database from a file:
|
||||
|
||||
>>> dbfile = read(open('numdb-test.dat', 'r'))
|
||||
|
||||
To split a number:
|
||||
|
||||
>>> dbfile.split('01006')
|
||||
['0', '100', '6']
|
||||
>>> dbfile.split('902006')
|
||||
['90', '20', '06']
|
||||
>>> dbfile.split('909856')
|
||||
['90', '985', '6']
|
||||
|
||||
To split the number and get properties for each part:
|
||||
|
||||
>>> dbfile.info('01006') == [
|
||||
... ('0', {'prop1': 'foo'}),
|
||||
... ('100', {'prop2': 'bar'}),
|
||||
... ('6', {}),
|
||||
... ]
|
||||
True
|
||||
>>> dbfile.info('02006') == [
|
||||
... ('0', {'prop1': 'foo'}),
|
||||
... ('200', {'prop2': 'bar', 'prop3': 'baz'}),
|
||||
... ('6', {}),
|
||||
... ]
|
||||
True
|
||||
>>> dbfile.info('03456') == [
|
||||
... ('0', {'prop1': 'foo'}),
|
||||
... ('345', {'prop2': 'bar', 'prop3': 'baz'}),
|
||||
... ('6', {}),
|
||||
... ]
|
||||
True
|
||||
>>> dbfile.info('902006') == [
|
||||
... ('90', {'prop1': 'booz'}),
|
||||
... ('20', {'prop2': 'foo'}),
|
||||
... ('06', {}),
|
||||
... ]
|
||||
True
|
||||
>>> dbfile.info('909856') == [
|
||||
... ('90', {'prop1': 'booz'}),
|
||||
... ('985', {'prop2': 'fooz'}),
|
||||
... ('6', {}),
|
||||
... ]
|
||||
True
|
||||
>>> dbfile.info('9889') == [
|
||||
... ('98', {'prop1': 'booz'}),
|
||||
... ('89', {'prop2': 'foo'}),
|
||||
... ]
|
||||
True
|
||||
>>> dbfile.info('633322') == [
|
||||
... ('6', {'prop1': 'boo'}),
|
||||
... ('333', {'prop2': 'bar', 'prop3': 'baz'}),
|
||||
... ('22', {}),
|
||||
... ]
|
||||
True
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
from pkg_resources import resource_stream
|
||||
|
||||
_line_re = re.compile('^(?P<indent> *)(?P<ranges>([^-,\s]+(-[^-,\s]+)?)(,[^-,\s]+(-[^-,\s]+)?)*)\s*(?P<props>.*)$')
|
||||
_prop_re = re.compile('(?P<prop>[0-9a-zA-Z-_]+)="(?P<value>[^"]*)"')
|
||||
|
||||
# this is a cache of open databases
|
||||
_open_databases = {}
|
||||
|
||||
# the prefixes attribute of NumDB is structured as follows:
|
||||
# prefixes = [
|
||||
# [ length, low, high, props, children ]
|
||||
# ...
|
||||
# ]
|
||||
# where children is a prefixes structure in it's own right
|
||||
# (there is no expected ordering within the list)
|
||||
|
||||
|
||||
class NumDB(object):
|
||||
|
||||
def __init__(self):
|
||||
self.prefixes = []
|
||||
|
||||
@staticmethod
|
||||
def _merge(results):
|
||||
"""Merge the provided list of possible results into a single result
|
||||
list (this is a generator)."""
|
||||
# expand the results to all have the same length
|
||||
ml = max(len(x) for x in results)
|
||||
results = [x + (ml - len(x)) * [None]
|
||||
for x in results]
|
||||
# go over each part
|
||||
for parts in zip(*results):
|
||||
# regroup parts into parts list and properties list
|
||||
partlist, proplist = list(zip(*(x for x in parts if x)))
|
||||
part = min(partlist, key=len)
|
||||
props = {}
|
||||
for p in proplist:
|
||||
props.update(p)
|
||||
yield part, props
|
||||
|
||||
@staticmethod
|
||||
def _find(number, prefixes):
|
||||
"""Lookup the specified number in the list of prefixes, this will
|
||||
return basically what info() should return but works recursively."""
|
||||
if not number:
|
||||
return []
|
||||
results = []
|
||||
if prefixes:
|
||||
for length, low, high, props, children in prefixes:
|
||||
if low <= number[:length] <= high and len(number) >= length:
|
||||
results.append([(number[:length], props)] +
|
||||
NumDB._find(number[length:], children))
|
||||
# not-found fallback
|
||||
if not results:
|
||||
return [(number, {})]
|
||||
# merge the results into a single result
|
||||
return list(NumDB._merge(results))
|
||||
|
||||
def info(self, number):
|
||||
"""Split the provided number in components and associate properties
|
||||
with each component. This returns a tuple of tuples. Each tuple
|
||||
consists of a string (a part of the number) and a dict of properties.
|
||||
"""
|
||||
return NumDB._find(number, self.prefixes)
|
||||
|
||||
def split(self, number):
|
||||
"""Split the provided number in components. This returns a tuple with
|
||||
the number of components identified."""
|
||||
return [part for part, props in self.info(number)]
|
||||
|
||||
|
||||
def _parse(fp):
|
||||
"""Read lines of text from the file pointer and generate indent, length,
|
||||
low, high, properties tuples."""
|
||||
for line in fp:
|
||||
# ignore comments
|
||||
if line[0] == '#' or line.strip() == '':
|
||||
continue # pragma: no cover (optimisation takes it out)
|
||||
# any other line should parse
|
||||
match = _line_re.search(line)
|
||||
indent = len(match.group('indent'))
|
||||
ranges = match.group('ranges')
|
||||
props = dict(_prop_re.findall(match.group('props')))
|
||||
for rnge in ranges.split(','):
|
||||
if '-' in rnge:
|
||||
low, high = rnge.split('-')
|
||||
else:
|
||||
low, high = rnge, rnge
|
||||
yield indent, len(low), low, high, props
|
||||
|
||||
|
||||
def read(fp):
|
||||
"""Return a new database with the data read from the specified file."""
|
||||
last_indent = 0
|
||||
db = NumDB()
|
||||
stack = {0: db.prefixes}
|
||||
for indent, length, low, high, props in _parse(fp):
|
||||
if indent > last_indent:
|
||||
# populate the children field of the last indent
|
||||
if stack[last_indent][-1][4] is None:
|
||||
stack[last_indent][-1][4] = []
|
||||
stack[indent] = stack[last_indent][-1][4]
|
||||
stack[indent].append([length, low, high, props, None])
|
||||
last_indent = indent
|
||||
return db
|
||||
|
||||
|
||||
def get(name):
|
||||
"""Opens a database with the specified name to perform queries on."""
|
||||
if name not in _open_databases:
|
||||
import codecs
|
||||
reader = codecs.getreader('utf-8')
|
||||
_open_databases[name] = read(reader(resource_stream(__name__, name + '.dat')))
|
||||
return _open_databases[name]
|
||||
Loading…
Add table
Add a link
Reference in a new issue