2013-10-11 17:28:32 +00:00
|
|
|
"""JSON token scanner
|
|
|
|
"""
|
|
|
|
import re
|
|
|
|
def _import_c_make_scanner():
|
|
|
|
try:
|
|
|
|
from simplejson._speedups import make_scanner
|
|
|
|
return make_scanner
|
|
|
|
except ImportError:
|
|
|
|
return None
|
|
|
|
c_make_scanner = _import_c_make_scanner()
|
|
|
|
|
|
|
|
__all__ = ['make_scanner', 'JSONDecodeError']
|
|
|
|
|
|
|
|
NUMBER_RE = re.compile(
|
|
|
|
r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
|
|
|
|
(re.VERBOSE | re.MULTILINE | re.DOTALL))
|
|
|
|
|
|
|
|
class JSONDecodeError(ValueError):
|
|
|
|
"""Subclass of ValueError with the following additional properties:
|
|
|
|
|
|
|
|
msg: The unformatted error message
|
|
|
|
doc: The JSON document being parsed
|
|
|
|
pos: The start index of doc where parsing failed
|
|
|
|
end: The end index of doc where parsing failed (may be None)
|
|
|
|
lineno: The line corresponding to pos
|
|
|
|
colno: The column corresponding to pos
|
|
|
|
endlineno: The line corresponding to end (may be None)
|
|
|
|
endcolno: The column corresponding to end (may be None)
|
|
|
|
|
|
|
|
"""
|
|
|
|
# Note that this exception is used from _speedups
|
|
|
|
def __init__(self, msg, doc, pos, end=None):
|
|
|
|
ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
|
|
|
|
self.msg = msg
|
|
|
|
self.doc = doc
|
|
|
|
self.pos = pos
|
|
|
|
self.end = end
|
|
|
|
self.lineno, self.colno = linecol(doc, pos)
|
|
|
|
if end is not None:
|
|
|
|
self.endlineno, self.endcolno = linecol(doc, end)
|
|
|
|
else:
|
|
|
|
self.endlineno, self.endcolno = None, None
|
|
|
|
|
|
|
|
def __reduce__(self):
|
|
|
|
return self.__class__, (self.msg, self.doc, self.pos, self.end)
|
|
|
|
|
|
|
|
|
|
|
|
def linecol(doc, pos):
|
|
|
|
lineno = doc.count('\n', 0, pos) + 1
|
|
|
|
if lineno == 1:
|
|
|
|
colno = pos + 1
|
|
|
|
else:
|
|
|
|
colno = pos - doc.rindex('\n', 0, pos)
|
|
|
|
return lineno, colno
|
|
|
|
|
|
|
|
|
|
|
|
def errmsg(msg, doc, pos, end=None):
|
|
|
|
lineno, colno = linecol(doc, pos)
|
|
|
|
msg = msg.replace('%r', repr(doc[pos:pos + 1]))
|
|
|
|
if end is None:
|
|
|
|
fmt = '%s: line %d column %d (char %d)'
|
|
|
|
return fmt % (msg, lineno, colno, pos)
|
|
|
|
endlineno, endcolno = linecol(doc, end)
|
|
|
|
fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
|
|
|
|
return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
|
|
|
|
|
|
|
|
|
|
|
|
def py_make_scanner(context):
|
|
|
|
parse_object = context.parse_object
|
|
|
|
parse_array = context.parse_array
|
|
|
|
parse_string = context.parse_string
|
|
|
|
match_number = NUMBER_RE.match
|
|
|
|
encoding = context.encoding
|
|
|
|
strict = context.strict
|
|
|
|
parse_float = context.parse_float
|
|
|
|
parse_int = context.parse_int
|
|
|
|
parse_constant = context.parse_constant
|
|
|
|
object_hook = context.object_hook
|
|
|
|
object_pairs_hook = context.object_pairs_hook
|
|
|
|
memo = context.memo
|
|
|
|
|
|
|
|
def _scan_once(string, idx):
|
|
|
|
errmsg = 'Expecting value'
|
|
|
|
try:
|
|
|
|
nextchar = string[idx]
|
|
|
|
except IndexError:
|
|
|
|
raise JSONDecodeError(errmsg, string, idx)
|
|
|
|
|
|
|
|
if nextchar == '"':
|
|
|
|
return parse_string(string, idx + 1, encoding, strict)
|
|
|
|
elif nextchar == '{':
|
|
|
|
return parse_object((string, idx + 1), encoding, strict,
|
|
|
|
_scan_once, object_hook, object_pairs_hook, memo)
|
|
|
|
elif nextchar == '[':
|
|
|
|
return parse_array((string, idx + 1), _scan_once)
|
|
|
|
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
|
|
|
|
return None, idx + 4
|
|
|
|
elif nextchar == 't' and string[idx:idx + 4] == 'true':
|
|
|
|
return True, idx + 4
|
|
|
|
elif nextchar == 'f' and string[idx:idx + 5] == 'false':
|
|
|
|
return False, idx + 5
|
|
|
|
|
|
|
|
m = match_number(string, idx)
|
|
|
|
if m is not None:
|
|
|
|
integer, frac, exp = m.groups()
|
|
|
|
if frac or exp:
|
|
|
|
res = parse_float(integer + (frac or '') + (exp or ''))
|
|
|
|
else:
|
|
|
|
res = parse_int(integer)
|
|
|
|
return res, m.end()
|
|
|
|
elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
|
|
|
|
return parse_constant('NaN'), idx + 3
|
|
|
|
elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
|
|
|
|
return parse_constant('Infinity'), idx + 8
|
|
|
|
elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
|
|
|
|
return parse_constant('-Infinity'), idx + 9
|
|
|
|
else:
|
|
|
|
raise JSONDecodeError(errmsg, string, idx)
|
|
|
|
|
|
|
|
def scan_once(string, idx):
|
2014-09-05 16:01:36 +00:00
|
|
|
if idx < 0:
|
|
|
|
# Ensure the same behavior as the C speedup, otherwise
|
|
|
|
# this would work for *some* negative string indices due
|
|
|
|
# to the behavior of __getitem__ for strings. #98
|
|
|
|
raise JSONDecodeError('Expecting value', string, idx)
|
2013-10-11 17:28:32 +00:00
|
|
|
try:
|
|
|
|
return _scan_once(string, idx)
|
|
|
|
finally:
|
|
|
|
memo.clear()
|
|
|
|
|
|
|
|
return scan_once
|
|
|
|
|
|
|
|
make_scanner = c_make_scanner or py_make_scanner
|