Switch to python3

2014-09-30 18:15:32 +02:00 · 2014-09-30 18:15:32 +02:00 · 9ba4b6a91a
commit 9ba4b6a91a
parent 531041e89a
5286 changed files with 677347 additions and 576888 deletions
--- a/Shared/lib/python3.4/site-packages/stdnum/numdb.py
+++ b/Shared/lib/python3.4/site-packages/stdnum/numdb.py
@ -0,0 +1,199 @@
+# numdb.py - module for handling hierarchically organised numbers
+#
+# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301 USA
+
+"""Query structured number format files with number properties.
+
+This module contains functions for reading and querying a database that
+stores numbers that use a hierarchical format (e.g. ISBN, IBAN, phone
+numbers, etc).
+
+To read a database from a file:
+
+>>> dbfile = read(open('numdb-test.dat', 'r'))
+
+To split a number:
+
+>>> dbfile.split('01006')
+['0', '100', '6']
+>>> dbfile.split('902006')
+['90', '20', '06']
+>>> dbfile.split('909856')
+['90', '985', '6']
+
+To split the number and get properties for each part:
+
+>>> dbfile.info('01006') == [
+...     ('0',   {'prop1': 'foo'}),
+...     ('100', {'prop2': 'bar'}),
+...     ('6',   {}),
+... ]
+True
+>>> dbfile.info('02006') == [
+...     ('0',   {'prop1': 'foo'}),
+...     ('200', {'prop2': 'bar', 'prop3': 'baz'}),
+...     ('6',   {}),
+... ]
+True
+>>> dbfile.info('03456') == [
+...     ('0', {'prop1': 'foo'}),
+...     ('345', {'prop2': 'bar', 'prop3': 'baz'}),
+...     ('6', {}),
+... ]
+True
+>>> dbfile.info('902006') == [
+...     ('90', {'prop1': 'booz'}),
+...     ('20', {'prop2': 'foo'}),
+...     ('06', {}),
+... ]
+True
+>>> dbfile.info('909856') == [
+...     ('90', {'prop1': 'booz'}),
+...     ('985', {'prop2': 'fooz'}),
+...     ('6', {}),
+... ]
+True
+>>> dbfile.info('9889') == [
+...     ('98', {'prop1': 'booz'}),
+...     ('89', {'prop2': 'foo'}),
+... ]
+True
+>>> dbfile.info('633322') == [
+...     ('6', {'prop1': 'boo'}),
+...     ('333', {'prop2': 'bar', 'prop3': 'baz'}),
+...     ('22', {}),
+... ]
+True
+
+"""
+
+import re
+from pkg_resources import resource_stream
+
+_line_re = re.compile('^(?P<indent> *)(?P<ranges>([^-,\s]+(-[^-,\s]+)?)(,[^-,\s]+(-[^-,\s]+)?)*)\s*(?P<props>.*)$')
+_prop_re = re.compile('(?P<prop>[0-9a-zA-Z-_]+)="(?P<value>[^"]*)"')
+
+# this is a cache of open databases
+_open_databases = {}
+
+# the prefixes attribute of NumDB is structured as follows:
+# prefixes = [
+#   [ length, low, high, props, children ]
+#   ...
+# ]
+# where children is a prefixes structure in it's own right
+# (there is no expected ordering within the list)
+
+
+class NumDB(object):
+
+    def __init__(self):
+        self.prefixes = []
+
+    @staticmethod
+    def _merge(results):
+        """Merge the provided list of possible results into a single result
+        list (this is a generator)."""
+        # expand the results to all have the same length
+        ml = max(len(x) for x in results)
+        results = [x + (ml - len(x)) * [None]
+                   for x in results]
+        # go over each part
+        for parts in zip(*results):
+            # regroup parts into parts list and properties list
+            partlist, proplist = list(zip(*(x for x in parts if x)))
+            part = min(partlist, key=len)
+            props = {}
+            for p in proplist:
+                props.update(p)
+            yield part, props
+
+    @staticmethod
+    def _find(number, prefixes):
+        """Lookup the specified number in the list of prefixes, this will
+        return basically what info() should return but works recursively."""
+        if not number:
+            return []
+        results = []
+        if prefixes:
+            for length, low, high, props, children in prefixes:
+                if low <= number[:length] <= high and len(number) >= length:
+                    results.append([(number[:length], props)] +
+                                   NumDB._find(number[length:], children))
+        # not-found fallback
+        if not results:
+            return [(number, {})]
+        # merge the results into a single result
+        return list(NumDB._merge(results))
+
+    def info(self, number):
+        """Split the provided number in components and associate properties
+        with each component. This returns a tuple of tuples. Each tuple
+        consists of a string (a part of the number) and a dict of properties.
+        """
+        return NumDB._find(number, self.prefixes)
+
+    def split(self, number):
+        """Split the provided number in components. This returns a tuple with
+        the number of components identified."""
+        return [part for part, props in self.info(number)]
+
+
+def _parse(fp):
+    """Read lines of text from the file pointer and generate indent, length,
+    low, high, properties tuples."""
+    for line in fp:
+        # ignore comments
+        if line[0] == '#' or line.strip() == '':
+            continue  # pragma: no cover (optimisation takes it out)
+        # any other line should parse
+        match = _line_re.search(line)
+        indent = len(match.group('indent'))
+        ranges = match.group('ranges')
+        props = dict(_prop_re.findall(match.group('props')))
+        for rnge in ranges.split(','):
+            if '-' in rnge:
+                low, high = rnge.split('-')
+            else:
+                low, high = rnge, rnge
+            yield indent, len(low), low, high, props
+
+
+def read(fp):
+    """Return a new database with the data read from the specified file."""
+    last_indent = 0
+    db = NumDB()
+    stack = {0: db.prefixes}
+    for indent, length, low, high, props in _parse(fp):
+        if indent > last_indent:
+            # populate the children field of the last indent
+            if stack[last_indent][-1][4] is None:
+                stack[last_indent][-1][4] = []
+            stack[indent] = stack[last_indent][-1][4]
+        stack[indent].append([length, low, high, props, None])
+        last_indent = indent
+    return db
+
+
+def get(name):
+    """Opens a database with the specified name to perform queries on."""
+    if name not in _open_databases:
+        import codecs
+        reader = codecs.getreader('utf-8')
+        _open_databases[name] = read(reader(resource_stream(__name__, name + '.dat')))
+    return _open_databases[name]