updating movieposterdb module

This commit is contained in:
Rolux 2009-07-13 23:12:06 +02:00
parent 4781644eac
commit acb0e3f0e8

View file

@ -3,28 +3,34 @@
import re import re
from oxlib import cache from oxlib.cache import getUrlUnicode
from oxlib import findRe from oxlib import findRe
def getPosterUrls(imdbId): def getData(id):
url = 'http://www.movieposterdb.com/movie/%s' % imdbId data = {
posterUrls = [] "url": getUrl(id)
if cache.exists(url): }
posterUrls = parsePage(url) data["posters"] = getPostersByUrl(data["url"])
return posterUrls return data
def parsePage(url): def getId(url):
posterUrls = [] return url.split("/")[-2]
html = cache.getUrlUnicode(url, timeout=86400)
groups = re.compile('<a href="(http://www.movieposterdb.com/group/.*?)">', re.DOTALL).findall(html) def getPostersByUrl(url):
for group in groups: posters = []
posterUrls += parsePage(group) html = getUrlUnicode(url)
posters = re.compile('<a href="(http://www.movieposterdb.com/poster/.*?)">', re.DOTALL).findall(html) results = re.compile('<a href="(http://www.movieposterdb.com/group/.*?)">', re.DOTALL).findall(html)
for poster in posters: for result in results:
html = cache.getUrlUnicode(poster) posters += getPostersByUrl(result)
posterUrls.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"')) results = re.compile('<a href="(http://www.movieposterdb.com/poster/.*?)">', re.DOTALL).findall(html)
return posterUrls for result in results:
html = getUrlUnicode(result)
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"'))
return posters
def getUrl(id):
return "http://www.movieposterdb.com/movie/%s/" % id
if __name__ == '__main__': if __name__ == '__main__':
print getPosterUrls('0133093') print getData('0060304')
print getPosterUrls('0060304') print getData('0133093')