openmedialibrary_platform/Shared/lib/python3.4/site-packages/chardet/chardetect.py

81 lines
2.4 KiB
Python
Raw Normal View History

2013-10-11 17:28:32 +00:00
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""
2015-11-04 12:01:55 +00:00
from __future__ import absolute_import, print_function, unicode_literals
import argparse
import sys
2013-10-11 17:28:32 +00:00
from io import open
2015-11-04 12:01:55 +00:00
from chardet import __version__
2013-10-11 17:28:32 +00:00
from chardet.universaldetector import UniversalDetector
2015-11-04 12:01:55 +00:00
def description_of(lines, name='stdin'):
"""
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
2013-10-11 17:28:32 +00:00
u = UniversalDetector()
2015-11-04 12:01:55 +00:00
for line in lines:
2013-10-11 17:28:32 +00:00
u.feed(line)
u.close()
result = u.result
if result['encoding']:
2015-11-04 12:01:55 +00:00
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['confidence'])
2013-10-11 17:28:32 +00:00
else:
2015-11-04 12:01:55 +00:00
return '{0}: no result'.format(name)
2013-10-11 17:28:32 +00:00
2015-11-04 12:01:55 +00:00
def main(argv=None):
'''
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
'''
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
parser.add_argument('input',
help='File whose encoding we would like to determine.',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)
for f in args.input:
if f.isatty():
print("You are running chardetect interactively. Press " +
"CTRL-D twice at the start of a blank line to signal the " +
"end of your input. If you want help, run chardetect " +
"--help\n", file=sys.stderr)
print(description_of(f, f.name))
2013-10-11 17:28:32 +00:00
if __name__ == '__main__':
main()