From 62f5e84642a3a50acb0c4740f4e17377f3fb4c27 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Fri, 17 Aug 2012 22:20:35 +0200
Subject: [PATCH] fix ox.cache.read_url

---
 ox/cache.py    | 32 +++++++++++++++++---------------
 ox/net.py      | 12 ++++++------
 ox/web/imdb.py |  2 +-
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/ox/cache.py b/ox/cache.py
index 3305177..37b5b93 100644
--- a/ox/cache.py
+++ b/ox/cache.py
@@ -68,6 +68,11 @@ class InvalidResult(Exception):
         self.result = result
         self.headers = headers
 
+def _fix_unicode_url(url):
+    if isinstance(url, unicode):
+        url = url.encode('utf-8')
+    return url
+
 def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, valid=None, unicode=False):
     '''
         url     - url to load
@@ -78,29 +83,27 @@ def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, val
                   if this function fails, InvalidResult will be raised deal with it in your code 
     '''
     #FIXME: send last-modified / etag from cache and only update if needed
-    if isinstance(url, unicode):
-        url = url.encode('utf-8')
-    data = store.get(url, data, headers, timeout)
-    if not data:
-        #print "get data", url
+    url = _fix_unicode_url(url)
+    result = store.get(url, data, headers, timeout)
+    if not result:
         try:
-            url_headers, data = net.read_url(url, data, headers, return_headers=True)
+            url_headers, result = net.read_url(url, data, headers, return_headers=True)
         except urllib2.HTTPError, e:
             e.headers['Status'] = "%s" % e.code
             url_headers = dict(e.headers)
-            data = e.read()
+            result = e.read()
             if url_headers.get('content-encoding', None) == 'gzip':
-                data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
-        if not valid or valid(data, url_headers):
-            store.set(url, data, data, url_headers)
+                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+        if not valid or valid(result, url_headers):
+            store.set(url, post_data=data, data=result, headers=url_headers)
         else:
-            raise InvalidResult(data, url_headers)
+            raise InvalidResult(result, url_headers)
     if unicode:
-        encoding = detect_encoding(data)
+        encoding = detect_encoding(result)
         if not encoding:
             encoding = 'latin-1'
-        data = data.decode(encoding)
-    return data
+        result = result.decode(encoding)
+    return result
 
 def save_url(url, filename, overwrite=False):
     if not os.path.exists(filename) or overwrite:
@@ -169,7 +172,6 @@ class SQLiteCache(Cache):
         r = None
         if timeout == 0:
             return r
-
         if data:
             url_hash = hashlib.sha1(url + '?' + data).hexdigest()
         else:
diff --git a/ox/net.py b/ox/net.py
index 60d6394..390755a 100644
--- a/ox/net.py
+++ b/ox/net.py
@@ -52,19 +52,19 @@ def open_url(url, data=None, headers=DEFAULT_HEADERS):
 
 def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unicode=False):
     f = open_url(url, data, headers)
-    data = f.read()
+    result = f.read()
     f.close()
     if f.headers.get('content-encoding', None) == 'gzip':
-        data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
+        result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
     if unicode:
-        encoding = detect_encoding(data)
+        encoding = detect_encoding(result)
         if not encoding:
             encoding = 'latin-1'
-        data = data.decode(encoding)
+        result = result.decode(encoding)
     if return_headers:
         f.headers['Status'] = "%s" % f.code
-        return dict(f.headers), data
-    return data
+        return dict(f.headers), result
+    return result
 
 def detect_encoding(data):
     if 'content="text/html; charset=utf-8"' in data:
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 0da40b4..0fc989d 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -264,7 +264,7 @@ class Imdb(SiteParser):
     }
 
     def read_url(self, url, timeout):
-        return read_url(url, timeout, unicode=True)
+        return read_url(url, timeout=timeout, unicode=True)
 
     def __init__(self, id, timeout=-1):
         #use akas.imdb.com to always get original title: