From c3c9c49788ece627c076fea2e883978345af1a73 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 1 Jun 2009 15:11:40 +0200 Subject: [PATCH] add possible validation option to getUrl --- oxlib/cache.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/oxlib/cache.py b/oxlib/cache.py index 097525a..eedc971 100644 --- a/oxlib/cache.py +++ b/oxlib/cache.py @@ -50,7 +50,22 @@ def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): _saveUrlHeaders(url_cache_file, url_headers) return url_headers -def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): +class InvalidResult(Exception): + """Base class for exceptions in this module.""" + def __init__(self, result, headers): + self.result = result + self.headers = headers + +def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, valid=None): + ''' + url - url to load + data - possible post data + headers - headers to send with request + timeout - get from cache if cache not older than given seconds, -1 to get from cache + valid - function to check if result is ok, its passed result and headers + if this function fails, InvalidResult will be raised deal with it in your code + ''' + #FIXME: send last-modified / etag from cache and only update if needed if isinstance(url, unicode): url = url.encode('utf-8') url_cache_file = _getUrlCacheFile(url, data, headers) @@ -64,11 +79,14 @@ def getUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout): result = e.read() if url_headers.get('content-encoding', None) == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() - _saveUrlCache(url_cache_file, result, url_headers) + if not valid or valid(result, url_headers): + _saveUrlCache(url_cache_file, result, url_headers) + else: + raise InvalidResult(result, url_headers) return result -def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl): - data = _getUrl(url, data, headers, timeout) +def getUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _getUrl=getUrl, valid=None): + data = _getUrl(url, data, headers, timeout, valid) encoding = getEncoding(data) if not encoding: encoding = 'latin-1'