From d17ae60f8ec369aa60930e63a5c207b92e770ea6 Mon Sep 17 00:00:00 2001 From: Rolux Date: Sun, 6 Jul 2008 20:12:30 +0200 Subject: [PATCH] movieposterdb module --- oxweb/movieposterdb.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 oxweb/movieposterdb.py diff --git a/oxweb/movieposterdb.py b/oxweb/movieposterdb.py new file mode 100644 index 0000000..d77e7b3 --- /dev/null +++ b/oxweb/movieposterdb.py @@ -0,0 +1,32 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 + +import re + +from oxlib import cache +from oxlib import findRe + +def getPosterUrls(imdbId): + url = 'http://www.movieposterdb.com/movie/%s' % imdbId + posterUrls = [] + if cache.exists(url): + posterUrls = parsePage(url) + return posterUrls + +def parsePage(url): + print url + posterUrls = [] + html = cache.getUrlUnicode(url, timeout=86400) + groups = re.compile('', re.DOTALL).findall(html) + print '%s Groups' % len(groups) + for group in groups: + posterUrls += parsePage(group) + posters = re.compile('', re.DOTALL).findall(html) + for poster in posters: + print 'Poster: ' + poster + html = cache.getUrlUnicode(poster) + posterUrls.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"')) + return posterUrls + +if __name__ == '__main__': + print getPosterUrls('0133093') \ No newline at end of file