diff --git a/oxweb/movieposterdb.py b/oxweb/movieposterdb.py new file mode 100644 index 0000000..d77e7b3 --- /dev/null +++ b/oxweb/movieposterdb.py @@ -0,0 +1,32 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 + +import re + +from oxlib import cache +from oxlib import findRe + +def getPosterUrls(imdbId): + url = 'http://www.movieposterdb.com/movie/%s' % imdbId + posterUrls = [] + if cache.exists(url): + posterUrls = parsePage(url) + return posterUrls + +def parsePage(url): + print url + posterUrls = [] + html = cache.getUrlUnicode(url, timeout=86400) + groups = re.compile('', re.DOTALL).findall(html) + print '%s Groups' % len(groups) + for group in groups: + posterUrls += parsePage(group) + posters = re.compile('', re.DOTALL).findall(html) + for poster in posters: + print 'Poster: ' + poster + html = cache.getUrlUnicode(poster) + posterUrls.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"')) + return posterUrls + +if __name__ == '__main__': + print getPosterUrls('0133093') \ No newline at end of file