update Shared

2015-11-04 13:01:55 +01:00 · 2015-11-04 13:01:55 +01:00 · 6881f3471a
commit 6881f3471a
parent e7ebbedd38
184 changed files with 13080 additions and 13691 deletions
--- a/Shared/lib/python3.4/site-packages/chardet/init.py
+++ b/Shared/lib/python3.4/site-packages/chardet/init.py
@ -15,7 +15,7 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-__version__ = "2.2.1"
+__version__ = "2.3.0"
 from sys import version_info


--- a/Shared/lib/python3.4/site-packages/chardet/chardetect.py
+++ b/Shared/lib/python3.4/site-packages/chardet/chardetect.py
@ -12,34 +12,68 @@ Example::
 If no paths are provided, it takes its input from stdin.

 """
-from io import open
-from sys import argv, stdin

+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import sys
+from io import open
+
+from chardet import __version__
 from chardet.universaldetector import UniversalDetector


-def description_of(file, name='stdin'):
-    """Return a string describing the probable encoding of a file."""
+def description_of(lines, name='stdin'):
+    """
+    Return a string describing the probable encoding of a file or
+    list of strings.
+
+    :param lines: The lines to get the encoding of.
+    :type lines: Iterable of bytes
+    :param name: Name of file or collection of lines
+    :type name: str
+    """
    u = UniversalDetector()
-    for line in file:
+    for line in lines:
        u.feed(line)
    u.close()
    result = u.result
    if result['encoding']:
-        return '%s: %s with confidence %s' % (name,
-                                              result['encoding'],
-                                              result['confidence'])
+        return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
+                                                     result['confidence'])
    else:
-        return '%s: no result' % name
+        return '{0}: no result'.format(name)


-def main():
-    if len(argv) <= 1:
-        print(description_of(stdin))
-    else:
-        for path in argv[1:]:
-            with open(path, 'rb') as f:
-                print(description_of(f, path))
+def main(argv=None):
+    '''
+    Handles command line arguments and gets things started.
+
+    :param argv: List of arguments, as if specified on the command-line.
+                 If None, ``sys.argv[1:]`` is used instead.
+    :type argv: list of str
+    '''
+    # Get command line arguments
+    parser = argparse.ArgumentParser(
+        description="Takes one or more file paths and reports their detected \
+                     encodings",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        conflict_handler='resolve')
+    parser.add_argument('input',
+                        help='File whose encoding we would like to determine.',
+                        type=argparse.FileType('rb'), nargs='*',
+                        default=[sys.stdin])
+    parser.add_argument('--version', action='version',
+                        version='%(prog)s {0}'.format(__version__))
+    args = parser.parse_args(argv)
+
+    for f in args.input:
+        if f.isatty():
+            print("You are running chardetect interactively. Press " +
+                  "CTRL-D twice at the start of a blank line to signal the " +
+                  "end of your input. If you want help, run chardetect " +
+                  "--help\n", file=sys.stderr)
+        print(description_of(f, f.name))


 if __name__ == '__main__':
--- a/Shared/lib/python3.4/site-packages/chardet/jpcntx.py
+++ b/Shared/lib/python3.4/site-packages/chardet/jpcntx.py
@ -177,6 +177,12 @@ class JapaneseContextAnalysis:
        return -1, 1

 class SJISContextAnalysis(JapaneseContextAnalysis):
+    def __init__(self):
+        self.charset_name = "SHIFT_JIS"
+
+    def get_charset_name(self):
+        return self.charset_name
+
    def get_order(self, aBuf):
        if not aBuf:
            return -1, 1
@ -184,6 +190,8 @@ class SJISContextAnalysis(JapaneseContextAnalysis):
        first_char = wrap_ord(aBuf[0])
        if ((0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC)):
            charLen = 2
+            if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
+                self.charset_name = "CP932"
        else:
            charLen = 1

--- a/Shared/lib/python3.4/site-packages/chardet/latin1prober.py
+++ b/Shared/lib/python3.4/site-packages/chardet/latin1prober.py
@ -129,11 +129,11 @@ class Latin1Prober(CharSetProber):
        if total < 0.01:
            confidence = 0.0
        else:
-            confidence = ((self._mFreqCounter[3] / total)
-                          - (self._mFreqCounter[1] * 20.0 / total))
+            confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
+                          / total)
        if confidence < 0.0:
            confidence = 0.0
        # lower the confidence of latin1 so that other more accurate
        # detector can take priority.
-        confidence = confidence * 0.5
+        confidence = confidence * 0.73
        return confidence
--- a/Shared/lib/python3.4/site-packages/chardet/mbcssm.py
+++ b/Shared/lib/python3.4/site-packages/chardet/mbcssm.py
@ -353,7 +353,7 @@ SJIS_cls = (
    2,2,2,2,2,2,2,2,  # 68 - 6f
    2,2,2,2,2,2,2,2,  # 70 - 77
    2,2,2,2,2,2,2,1,  # 78 - 7f
-    3,3,3,3,3,3,3,3,  # 80 - 87
+    3,3,3,3,3,2,2,3,  # 80 - 87
    3,3,3,3,3,3,3,3,  # 88 - 8f
    3,3,3,3,3,3,3,3,  # 90 - 97
    3,3,3,3,3,3,3,3,  # 98 - 9f
@ -369,9 +369,8 @@ SJIS_cls = (
    2,2,2,2,2,2,2,2,  # d8 - df
    3,3,3,3,3,3,3,3,  # e0 - e7
    3,3,3,3,3,4,4,4,  # e8 - ef
-    4,4,4,4,4,4,4,4,  # f0 - f7
-    4,4,4,4,4,0,0,0   # f8 - ff
-)
+    3,3,3,3,3,3,3,3,  # f0 - f7
+    3,3,3,3,3,0,0,0)  # f8 - ff


 SJIS_st = (
@ -571,5 +570,3 @@ UTF8SMModel = {'classTable': UTF8_cls,
               'stateTable': UTF8_st,
               'charLenTable': UTF8CharLenTable,
               'name': 'UTF-8'}
-
-# flake8: noqa
--- a/Shared/lib/python3.4/site-packages/chardet/sjisprober.py
+++ b/Shared/lib/python3.4/site-packages/chardet/sjisprober.py
@ -47,7 +47,7 @@ class SJISProber(MultiByteCharSetProber):
        self._mContextAnalyzer.reset()

    def get_charset_name(self):
-        return "SHIFT_JIS"
+        return self._mContextAnalyzer.get_charset_name()

    def feed(self, aBuf):
        aLen = len(aBuf)
--- a/Shared/lib/python3.4/site-packages/chardet/universaldetector.py
+++ b/Shared/lib/python3.4/site-packages/chardet/universaldetector.py
@ -71,9 +71,9 @@ class UniversalDetector:

        if not self._mGotData:
            # If the data starts with BOM, we know it is UTF
-            if aBuf[:3] == codecs.BOM:
+            if aBuf[:3] == codecs.BOM_UTF8:
                # EF BB BF  UTF-8 with BOM
-                self.result = {'encoding': "UTF-8", 'confidence': 1.0}
+                self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
            elif aBuf[:4] == codecs.BOM_UTF32_LE:
                # FF FE 00 00  UTF-32, little-endian BOM
                self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}