From 4b3a449e82462e31a5b40d742e94a36dedb39a40 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 30 Aug 2016 10:16:18 +0200 Subject: [PATCH] avoid deep recursion in bdecode3, brings performance closer to py2 version --- ox/torrent/bencode3.py | 156 ++++++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 74 deletions(-) diff --git a/ox/torrent/bencode3.py b/ox/torrent/bencode3.py index bf7717e..1ed68fa 100644 --- a/ox/torrent/bencode3.py +++ b/ox/torrent/bencode3.py @@ -3,85 +3,93 @@ # bencode.py python3 compatable bencode / bdecode # ## +from collections import OrderedDict -def _decode_int(data): - """ - decode integer from bytearray - return int, remaining data - """ - data = data[1:] - end = data.index(b'e') - return int(data[:end], 10), data[end+1:] -def _decode_str(data): - """ - decode string from bytearray - return string, remaining data - """ - start = data.index(b':') - l = int(data[:start].decode(), 10) - if l <= 0: - raise Exception('invalid string size: %d' % l) - start += 1 - ret = bytes(data[start:start+l]) - try: - ret = ret.decode('utf-8') - except: - pass - data = data[start+l:] - return ret, data +class Decoder(object): -def _decode_list(data): - """ - decode list from bytearray - return list, remaining data - """ - ls = [] - data = data[1:] - while data[0] != ord(b'e'): - elem, data = _decode(data) - ls.append(elem) - return ls, data[1:] + def _decode_int(self): + """ + decode integer from bytearray + return int + """ + self.idx += 1 + start = self.idx + end = self.data.index(b'e', self.idx) + self.idx = end + 1 + return int(self.data[start:end]) -def _decode_dict(data): - """ - decode dict from bytearray - return dict, remaining data - """ - d = {} - data = data[1:] - while data[0] != ord(b'e'): - k, data = _decode_str(data) - v, data = _decode(data) - d[k] = v - return d, data[1:] + def _decode_str(self): + """ + decode string from bytearray + return string + """ + start = self.data.index(b':', self.idx) + l = int(self.data[self.idx:start].decode(), 10) + if l <= 0: + raise Exception('invalid string size: %d' % l) + start += 1 + ret = self.data[start:start+l] + try: + ret = ret.decode('utf-8') + except: + pass + self.idx = start + l + return ret -def _decode(data): - """ - decode a bytearray - return deserialized object, remaining data - """ - ch = chr(data[0]) - if ch == 'l': - return _decode_list(data) - elif ch == 'i': - return _decode_int(data) - elif ch == 'd': - return _decode_dict(data) - elif ch.isdigit(): - return _decode_str(data) - else: - raise Exception('could not deserialize data: %s' % data) + def _decode_list(self): + """ + decode list from bytearray + return list + """ + ls = [] + self.idx += 1 + while self.data[self.idx] != ord(b'e'): + ls.append(self._decode()) + self.idx += 1 + return ls + + def _decode_dict(self): + """ + decode dict from bytearray + return dict + """ + d = OrderedDict() + self.idx += 1 + while self.data[self.idx] != ord(b'e'): + k = self._decode_str() + v = self._decode() + d[k] = v + self.idx += 1 + return d + + def _decode(self): + ch = chr(self.data[self.idx]) + if ch == 'l': + return self._decode_list() + elif ch == 'i': + return self._decode_int() + elif ch == 'd': + return self._decode_dict() + elif ch.isdigit(): + return self._decode_str() + else: + raise Exception('could not decode data: %s' % data) + + def decode(self, data): + self.idx = 0 + self.data = data + obj = self._decode() + if len(data) != self.idx: + raise Exception('failed to decode, extra data: %s' % data) + return obj def bdecode(data): """ decode a bytearray - return deserialized object + return decoded object """ - obj, data = _decode(data) - if len(data) > 0: - raise Exception('failed to deserialize, extra data: %s' % data) - return obj + return Decoder().decode(data) def _encode_str(s, buff): """ @@ -91,7 +99,7 @@ def _encode_str(s, buff): l = len(s) buff.append(bytearray(str(l)+':', 'utf-8')) buff.append(s) - + def _encode_int(i, buff): """ encode integer to a buffer @@ -114,10 +122,10 @@ def _encode_dict(d, buff): encode dict """ buff.append(b'd') - l = list(d.keys()) - l.sort() - for k in l: - _encode(str(k), buff) + for k in sorted(d): + if not isinstance(k, [bytes, str]): + k = str(k) + _encode(k, buff) _encode(d[k], buff) buff.append(b'e')