fix fixunicode
This commit is contained in:
parent
cd9f49b771
commit
f5770f12d1
1 changed files with 6 additions and 3 deletions
|
@ -6,7 +6,7 @@ from __future__ import print_function
|
||||||
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from six import unichr
|
from six import unichr, PY3
|
||||||
|
|
||||||
__all__ = ['fix_bad_unicode']
|
__all__ = ['fix_bad_unicode']
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ def fix_bad_unicode(text):
|
||||||
>>> fix_bad_unicode('This text was never Unicode at all\x85')
|
>>> fix_bad_unicode('This text was never Unicode at all\x85')
|
||||||
'This text was never Unicode at all…'
|
'This text was never Unicode at all…'
|
||||||
"""
|
"""
|
||||||
if not isinstance(text, str):
|
if isinstance(text, bytes):
|
||||||
raise TypeError("This isn't even decoded into Unicode yet. "
|
raise TypeError("This isn't even decoded into Unicode yet. "
|
||||||
"Decode it first.")
|
"Decode it first.")
|
||||||
if len(text) == 0:
|
if len(text) == 0:
|
||||||
|
@ -151,7 +151,10 @@ def text_badness(text):
|
||||||
- Improbable single-byte characters, such as ƒ or ¬
|
- Improbable single-byte characters, such as ƒ or ¬
|
||||||
- Letters in somewhat rare scripts
|
- Letters in somewhat rare scripts
|
||||||
'''
|
'''
|
||||||
|
if PY3:
|
||||||
assert isinstance(text, str)
|
assert isinstance(text, str)
|
||||||
|
else:
|
||||||
|
assert isinstance(text, unicode)
|
||||||
errors = 0
|
errors = 0
|
||||||
very_weird_things = 0
|
very_weird_things = 0
|
||||||
weird_things = 0
|
weird_things = 0
|
||||||
|
|
Loading…
Reference in a new issue