Switch to python3
This commit is contained in:
parent
531041e89a
commit
9ba4b6a91a
5286 changed files with 677347 additions and 576888 deletions
155
Shared/lib/python3.4/site-packages/stdnum/util.py
Normal file
155
Shared/lib/python3.4/site-packages/stdnum/util.py
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
# util.py - common utility functions
|
||||
# coding: utf-8
|
||||
#
|
||||
# Copyright (C) 2012, 2013 Arthur de Jong
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
|
||||
"""Common utility functions for other stdnum modules.
|
||||
|
||||
This module is meant for internal use by stdnum modules and is not
|
||||
guaranteed to remain stable and as such not part of the public API of
|
||||
stdnum.
|
||||
"""
|
||||
|
||||
import pkgutil
|
||||
import pydoc
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
from stdnum.exceptions import *
|
||||
|
||||
|
||||
_strip_doctest_re = re.compile('^>>> .*\Z', re.DOTALL | re.MULTILINE)
|
||||
|
||||
|
||||
def _mk_char_map(mapping):
|
||||
"""Transform a dictionary with comma separated uniode chracter names
|
||||
to tuples with unicode characters as key."""
|
||||
for key, value in mapping.items():
|
||||
for char in key.split(','):
|
||||
try:
|
||||
yield (unicodedata.lookup(char), value)
|
||||
except KeyError: # pragma: no cover (does not happen on Python3)
|
||||
pass
|
||||
|
||||
|
||||
# build mapping of Unicode characters to equivalent ASCII characters
|
||||
_char_map = dict(_mk_char_map({
|
||||
'HYPHEN-MINUS,ARMENIAN HYPHEN,HEBREW PUNCTUATION MAQAF,HYPHEN,'
|
||||
'NON-BREAKING HYPHEN,FIGURE DASH,EN DASH,EM DASH,HORIZONTAL BAR,'
|
||||
'SMALL HYPHEN-MINUS,FULLWIDTH HYPHEN-MINUS,MONGOLIAN NIRUGU,OVERLINE,'
|
||||
'HYPHEN BULLET,MACRON,MODIFIER LETTER MINUS SIGN,FULLWIDTH MACRON,'
|
||||
'OGHAM SPACE MARK,SUPERSCRIPT MINUS,SUBSCRIPT MINUS,MINUS SIGN,'
|
||||
'HORIZONTAL LINE EXTENSION,HORIZONTAL SCAN LINE-1,HORIZONTAL SCAN LINE-3,'
|
||||
'HORIZONTAL SCAN LINE-7,HORIZONTAL SCAN LINE-9,STRAIGHTNESS': '-',
|
||||
'ASTERISK,ARABIC FIVE POINTED STAR,SYRIAC HARKLEAN ASTERISCUS,'
|
||||
'FLOWER PUNCTUATION MARK,VAI FULL STOP,SMALL ASTERISK,FULLWIDTH ASTERISK,'
|
||||
'ASTERISK OPERATOR,STAR OPERATOR,HEAVY ASTERISK,LOW ASTERISK,'
|
||||
'OPEN CENTRE ASTERISK,EIGHT SPOKED ASTERISK,SIXTEEN POINTED ASTERISK,'
|
||||
'TEARDROP-SPOKED ASTERISK,OPEN CENTRE TEARDROP-SPOKED ASTERISK,'
|
||||
'HEAVY TEARDROP-SPOKED ASTERISK,EIGHT TEARDROP-SPOKED PROPELLER ASTERISK,'
|
||||
'HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK,'
|
||||
'ARABIC FIVE POINTED STAR': '*',
|
||||
'COMMA,ARABIC COMMA,SINGLE LOW-9 QUOTATION MARK,IDEOGRAPHIC COMMA,'
|
||||
'ARABIC DECIMAL SEPARATOR,ARABIC THOUSANDS SEPARATOR,PRIME,RAISED COMMA,'
|
||||
'PRESENTATION FORM FOR VERTICAL COMMA,SMALL COMMA,'
|
||||
'SMALL IDEOGRAPHIC COMMA,FULLWIDTH COMMA,CEDILLA': ',',
|
||||
'FULL STOP,MIDDLE DOT,GREEK ANO TELEIA,ARABIC FULL STOP,'
|
||||
'IDEOGRAPHIC FULL STOP,SYRIAC SUPRALINEAR FULL STOP,'
|
||||
'SYRIAC SUBLINEAR FULL STOP,SAMARITAN PUNCTUATION NEQUDAA,'
|
||||
'TIBETAN MARK INTERSYLLABIC TSHEG,TIBETAN MARK DELIMITER TSHEG BSTAR,'
|
||||
'RUNIC SINGLE PUNCTUATION,BULLET,ONE DOT LEADER,HYPHENATION POINT,'
|
||||
'WORD SEPARATOR MIDDLE DOT,RAISED DOT,KATAKANA MIDDLE DOT,'
|
||||
'SMALL FULL STOP,FULLWIDTH FULL STOP,HALFWIDTH KATAKANA MIDDLE DOT,'
|
||||
'AEGEAN WORD SEPARATOR DOT,PHOENICIAN WORD SEPARATOR,'
|
||||
'KHAROSHTHI PUNCTUATION DOT,DOT ABOVE,ARABIC SYMBOL DOT ABOVE,'
|
||||
'ARABIC SYMBOL DOT BELOW,BULLET OPERATOR,DOT OPERATOR': '.',
|
||||
'SOLIDUS,SAMARITAN PUNCTUATION ARKAANU,FULLWIDTH SOLIDUS,DIVISION SLASH,'
|
||||
'MATHEMATICAL RISING DIAGONAL,BIG SOLIDUS,FRACTION SLASH': '/',
|
||||
'COLON,ETHIOPIC WORDSPACE,RUNIC MULTIPLE PUNCTUATION,MONGOLIAN COLON,'
|
||||
'PRESENTATION FORM FOR VERTICAL COLON,FULLWIDTH COLON,'
|
||||
'PRESENTATION FORM FOR VERTICAL TWO DOT LEADER,SMALL COLON': ':',
|
||||
'SPACE,NO-BREAK SPACE,EN QUAD,EM QUAD,EN SPACE,EM SPACE,'
|
||||
'THREE-PER-EM SPACE,FOUR-PER-EM SPACE,SIX-PER-EM SPACE,FIGURE SPACE,'
|
||||
'PUNCTUATION SPACE,THIN SPACE,HAIR SPACE,NARROW NO-BREAK SPACE,'
|
||||
'MEDIUM MATHEMATICAL SPACE,IDEOGRAPHIC SPACE': ' ',
|
||||
}))
|
||||
|
||||
|
||||
def _clean_chars(number):
|
||||
"""Replace various Unicode characters with their ASCII counterpart."""
|
||||
return ''.join(_char_map.get(x, x) for x in number)
|
||||
|
||||
|
||||
def clean(number, deletechars=''):
|
||||
"""Remove the specified characters from the supplied number.
|
||||
|
||||
>>> clean('123-456:78 9', ' -:')
|
||||
'123456789'
|
||||
>>> clean('1–2—3―4')
|
||||
'1-2-3-4'
|
||||
"""
|
||||
try:
|
||||
number = ''.join(x for x in number)
|
||||
except:
|
||||
raise InvalidFormat()
|
||||
if sys.version < '3' and isinstance(number, str): # pragma: no cover (Python 2/3 specific code)
|
||||
try:
|
||||
number = _clean_chars(number.decode()).encode()
|
||||
except UnicodeError:
|
||||
try:
|
||||
number = _clean_chars(number.decode('utf-8')).encode('utf-8')
|
||||
except UnicodeError:
|
||||
pass
|
||||
else: # pragma: no cover (Python 2/3 specific code)
|
||||
number = _clean_chars(number)
|
||||
return ''.join(x for x in number if x not in deletechars)
|
||||
|
||||
|
||||
def get_number_modules(base='stdnum'):
|
||||
"""Yield all the module and package names under the specified module."""
|
||||
__import__(base)
|
||||
module = sys.modules[base]
|
||||
for loader, name, is_pkg in pkgutil.walk_packages(
|
||||
module.__path__, module.__name__ + '.',
|
||||
onerror=lambda x: None
|
||||
):
|
||||
__import__(name)
|
||||
module = sys.modules[name]
|
||||
if hasattr(module, 'validate'):
|
||||
yield module
|
||||
|
||||
|
||||
def get_module_name(module):
|
||||
"""Return the short description of the number."""
|
||||
return pydoc.splitdoc(pydoc.getdoc(module))[0].strip('.')
|
||||
|
||||
|
||||
def get_module_description(module):
|
||||
"""Return a description of the number."""
|
||||
doc = pydoc.splitdoc(pydoc.getdoc(module))[1]
|
||||
# remove the doctests
|
||||
return _strip_doctest_re.sub('', doc[1]).strip(),
|
||||
|
||||
|
||||
def get_module_list():
|
||||
for module in get_number_modules():
|
||||
yield ' * %s: %s' % (
|
||||
module.__name__.replace('stdnum.', ''),
|
||||
get_module_name(module),
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue