diff --git a/ox/cache.py b/ox/cache.py index e5f3dc4..0a1383b 100644 --- a/ox/cache.py +++ b/ox/cache.py @@ -299,8 +299,8 @@ class FileCache(Cache): def files(self, domain, h): prefix = os.path.join(self.root, domain, h[:2], h[2:4], h[4:6], h[6:8]) - i = os.path.join(prefix, '%s.json'%h) - f = os.path.join(prefix, '%s.dat'%h) + i = os.path.join(prefix, '%s.json' % h) + f = os.path.join(prefix, '%s.dat' % h) return prefix, i, f def get(self, url, data={}, headers=None, timeout=-1, value="data"): @@ -370,8 +370,97 @@ class FileCache(Cache): with open(i, 'w') as _i: json.dump(info, _i) + +class KVCache(Cache): + _bytes_only = False + + def _keys(self, url, data, headers=None): + url_hash = self.get_url_hash(url, data) + domain = self.get_domain(url) + key = 'ox:%s:%s' % (domain, url_hash) + return key, key + ':data' + + def get(self, url, data={}, headers=None, timeout=-1, value="data"): + if timeout == 0: + return None + + r = None + info_key, data_key = self._keys(url, data, headers) + info = self.backend.get(info_key) + if info: + if self._bytes_only: + info = json.loads(info.decode()) + now = time.mktime(time.localtime()) + expired = now-timeout + + if value != 'headers' and info['only_headers']: + return None + if timeout < 0 or info['created'] > expired: + if value == 'headers': + r = info['headers'] + else: + data = self.backend.get(data_key) + if data: + r = zlib.decompress(data) + return r + + def delete(self, url, data=None, headers=None): + for key in self._keys(url, data, headers): + self.backend.delete(key) + + def set(self, url, post_data, data, headers): + info_key, data_key = self._keys(url, post_data, headers) + + created = time.mktime(time.localtime()) + content_type = headers.get('content-type', '').split(';')[0].strip() + + info = { + 'compressed': content_type in COMPRESS_TYPES, + 'only_headers': data == -1, + 'created': created, + 'headers': headers, + 'url': url, + } + if post_data: + info['post_data'] = post_data + if not info['only_headers']: + if info['compressed']: + data = zlib.compress(data) + elif not isinstance(data, bytes): + data = data.encode('utf-8') + self.backend.set(data_key, data) + if self._bytes_only: + info = json.dumps(info, ensure_ascii=False).encode('utf-8') + self.backend.set(info_key, info) + + +class MemCache(KVCache): + _bytes_only = False + + def __init__(self): + import pylibmc + + f, self.host = cache_path().split(':', 1) + self.backend = pylibmc.Client([self.host]) + self.backend.behaviors['connect_timeout'] = 10000 + + +class RedisCache(KVCache): + _bytes_only = True + + def __init__(self): + import redis + + f, self.url = cache_path().split(':', 1) + self.backend = redis.from_url(self.url) + + if cache_path().startswith('fs:'): store = FileCache() +elif cache_path().startswith('redis:'): + store = RedisCache() +elif cache_path().startswith('memcache:'): + store = MemCache() else: store = SQLiteCache()