avoid deep recursion in bdecode3, brings performance closer to py2 version

This commit is contained in:
j 2016-08-30 10:16:18 +02:00
parent 5168459936
commit 4b3a449e82

View file

@ -3,85 +3,93 @@
# bencode.py python3 compatable bencode / bdecode # bencode.py python3 compatable bencode / bdecode
# #
## ##
from collections import OrderedDict
def _decode_int(data):
"""
decode integer from bytearray
return int, remaining data
"""
data = data[1:]
end = data.index(b'e')
return int(data[:end], 10), data[end+1:]
def _decode_str(data): class Decoder(object):
"""
decode string from bytearray
return string, remaining data
"""
start = data.index(b':')
l = int(data[:start].decode(), 10)
if l <= 0:
raise Exception('invalid string size: %d' % l)
start += 1
ret = bytes(data[start:start+l])
try:
ret = ret.decode('utf-8')
except:
pass
data = data[start+l:]
return ret, data
def _decode_list(data): def _decode_int(self):
""" """
decode list from bytearray decode integer from bytearray
return list, remaining data return int
""" """
ls = [] self.idx += 1
data = data[1:] start = self.idx
while data[0] != ord(b'e'): end = self.data.index(b'e', self.idx)
elem, data = _decode(data) self.idx = end + 1
ls.append(elem) return int(self.data[start:end])
return ls, data[1:]
def _decode_dict(data): def _decode_str(self):
""" """
decode dict from bytearray decode string from bytearray
return dict, remaining data return string
""" """
d = {} start = self.data.index(b':', self.idx)
data = data[1:] l = int(self.data[self.idx:start].decode(), 10)
while data[0] != ord(b'e'): if l <= 0:
k, data = _decode_str(data) raise Exception('invalid string size: %d' % l)
v, data = _decode(data) start += 1
d[k] = v ret = self.data[start:start+l]
return d, data[1:] try:
ret = ret.decode('utf-8')
except:
pass
self.idx = start + l
return ret
def _decode(data): def _decode_list(self):
""" """
decode a bytearray decode list from bytearray
return deserialized object, remaining data return list
""" """
ch = chr(data[0]) ls = []
if ch == 'l': self.idx += 1
return _decode_list(data) while self.data[self.idx] != ord(b'e'):
elif ch == 'i': ls.append(self._decode())
return _decode_int(data) self.idx += 1
elif ch == 'd': return ls
return _decode_dict(data)
elif ch.isdigit(): def _decode_dict(self):
return _decode_str(data) """
else: decode dict from bytearray
raise Exception('could not deserialize data: %s' % data) return dict
"""
d = OrderedDict()
self.idx += 1
while self.data[self.idx] != ord(b'e'):
k = self._decode_str()
v = self._decode()
d[k] = v
self.idx += 1
return d
def _decode(self):
ch = chr(self.data[self.idx])
if ch == 'l':
return self._decode_list()
elif ch == 'i':
return self._decode_int()
elif ch == 'd':
return self._decode_dict()
elif ch.isdigit():
return self._decode_str()
else:
raise Exception('could not decode data: %s' % data)
def decode(self, data):
self.idx = 0
self.data = data
obj = self._decode()
if len(data) != self.idx:
raise Exception('failed to decode, extra data: %s' % data)
return obj
def bdecode(data): def bdecode(data):
""" """
decode a bytearray decode a bytearray
return deserialized object return decoded object
""" """
obj, data = _decode(data) return Decoder().decode(data)
if len(data) > 0:
raise Exception('failed to deserialize, extra data: %s' % data)
return obj
def _encode_str(s, buff): def _encode_str(s, buff):
""" """
@ -91,7 +99,7 @@ def _encode_str(s, buff):
l = len(s) l = len(s)
buff.append(bytearray(str(l)+':', 'utf-8')) buff.append(bytearray(str(l)+':', 'utf-8'))
buff.append(s) buff.append(s)
def _encode_int(i, buff): def _encode_int(i, buff):
""" """
encode integer to a buffer encode integer to a buffer
@ -114,10 +122,10 @@ def _encode_dict(d, buff):
encode dict encode dict
""" """
buff.append(b'd') buff.append(b'd')
l = list(d.keys()) for k in sorted(d):
l.sort() if not isinstance(k, [bytes, str]):
for k in l: k = str(k)
_encode(str(k), buff) _encode(k, buff)
_encode(d[k], buff) _encode(d[k], buff)
buff.append(b'e') buff.append(b'e')