cache read_url_utf8 too

2007-07-14 11:25:50 +00:00 · 2007-07-14 11:25:50 +00:00 · c434187130
commit c434187130
parent 23e27d1b36
1 changed files with 25 additions and 3 deletions
--- a/scrapeit/imdb.py
+++ b/scrapeit/imdb.py
@ -11,13 +11,35 @@ from elementtree.ElementTree import parse, tostring
 from BeautifulSoup import BeautifulSoup
 from google import google
-from utils import stripTags, read_url_utf8, htmldecode
+from utils import stripTags, htmldecode
 import utils
 cache_base = "/var/cache/scrapeit/cache/"
 def read_url_utf8(url):
  path = os.path.join(cache_base, url.replace('http://',''))
  if path.endswith('/'):
    path = "%sindex.html" % path
  if os.path.isdir(path):
    path = "%s/index.html" % path
  if os.path.exists(path):
    f = open(path)
    data = f.read()
    f.close()
    return data
  else:
    data = utils.read_url_utf8(url)
    folder = os.path.dirname(path)
    if not os.path.exists(folder):
      os.makedirs(folder)
    f = open(path, 'w')
    f.write(data)
    f.close()
    return data
 def read_url(url):
-  base = "/var/cache/scrapeit/cache/"
+  path = os.path.join(cache_base, url.replace('http://',''))
  path = os.path.join(base, url.replace('http://',''))
  if path.endswith('/'):
    path = "%sindex.html" % path
  if os.path.isdir(path):