From acb0e3f0e8837c0117567bce61f5a2634df87f97 Mon Sep 17 00:00:00 2001 From: Rolux Date: Mon, 13 Jul 2009 23:12:06 +0200 Subject: [PATCH] updating movieposterdb module --- oxweb/movieposterdb.py | 48 ++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/oxweb/movieposterdb.py b/oxweb/movieposterdb.py index 90be701..0eccb0c 100644 --- a/oxweb/movieposterdb.py +++ b/oxweb/movieposterdb.py @@ -3,28 +3,34 @@ import re -from oxlib import cache +from oxlib.cache import getUrlUnicode from oxlib import findRe -def getPosterUrls(imdbId): - url = 'http://www.movieposterdb.com/movie/%s' % imdbId - posterUrls = [] - if cache.exists(url): - posterUrls = parsePage(url) - return posterUrls - -def parsePage(url): - posterUrls = [] - html = cache.getUrlUnicode(url, timeout=86400) - groups = re.compile('', re.DOTALL).findall(html) - for group in groups: - posterUrls += parsePage(group) - posters = re.compile('', re.DOTALL).findall(html) - for poster in posters: - html = cache.getUrlUnicode(poster) - posterUrls.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"')) - return posterUrls +def getData(id): + data = { + "url": getUrl(id) + } + data["posters"] = getPostersByUrl(data["url"]) + return data + +def getId(url): + return url.split("/")[-2] + +def getPostersByUrl(url): + posters = [] + html = getUrlUnicode(url) + results = re.compile('', re.DOTALL).findall(html) + for result in results: + posters += getPostersByUrl(result) + results = re.compile('', re.DOTALL).findall(html) + for result in results: + html = getUrlUnicode(result) + posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"')) + return posters + +def getUrl(id): + return "http://www.movieposterdb.com/movie/%s/" % id if __name__ == '__main__': - print getPosterUrls('0133093') - print getPosterUrls('0060304') \ No newline at end of file + print getData('0060304') + print getData('0133093')