avoid variable collision, save url

This commit is contained in:
j 2011-11-01 22:53:36 +01:00
parent e290438c75
commit 2b7268157b
2 changed files with 5 additions and 4 deletions

View file

@ -260,7 +260,6 @@ class FileCache(Cache):
domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:]) domain = ".".join(urlparse.urlparse(url)[1].split('.')[-2:])
prefix, i, f = self.files(domain, url_hash) prefix, i, f = self.files(domain, url_hash)
if os.path.exists(i): if os.path.exists(i):
with open(i) as _i: with open(i) as _i:
info = json.load(_i) info = json.load(_i)
@ -298,6 +297,7 @@ class FileCache(Cache):
'only_headers': data == -1, 'only_headers': data == -1,
'created': created, 'created': created,
'headers': headers, 'headers': headers,
'url': url,
} }
if post_data: if post_data:
info['post_data'] = post_data info['post_data'] = post_data

View file

@ -15,7 +15,7 @@ def getId(url):
def getUrl(id): def getUrl(id):
return "http://www.criterion.com/films/%s" % id return "http://www.criterion.com/films/%s" % id
def getData(id, timeout=ox.cache.cache_timeout, imdb=False): def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
''' '''
>>> getData('1333')['imdbId'] >>> getData('1333')['imdbId']
u'0060304' u'0060304'
@ -70,7 +70,7 @@ def getData(id, timeout=ox.cache.cache_timeout, imdb=False):
if timeout == ox.cache.cache_timeout: if timeout == ox.cache.cache_timeout:
timeout = -1 timeout = -1
if imdb: if get_imdb:
data['imdbId'] = imdb.getMovieId(data['title'], data['imdbId'] = imdb.getMovieId(data['title'],
data['director'], data['year'], timeout=timeout) data['director'], data['year'], timeout=timeout)
return data return data
@ -87,7 +87,8 @@ def getIds():
def getIdsByPage(page): def getIdsByPage(page):
ids = [] ids = []
html = readUrlUnicode("http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page) url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
html = readUrlUnicode(url)
results = re.compile("films/(\d+)").findall(html) results = re.compile("films/(\d+)").findall(html)
for result in results: for result in results:
ids.append(result) ids.append(result)