fix recursion, make sure that result is not redirect

This commit is contained in:
j 2009-08-20 20:29:34 +02:00
parent 1ef34da4d1
commit 7df4cc4fa7

View file

@ -10,6 +10,8 @@ def getData(id):
''' '''
>>> getData('0060304')['posters'][0] >>> getData('0060304')['posters'][0]
u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg' u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg'
>>> getData('0123456')['posters']
[]
''' '''
data = { data = {
"url": getUrl(id) "url": getUrl(id)
@ -20,17 +22,18 @@ def getData(id):
def getId(url): def getId(url):
return url.split("/")[-2] return url.split("/")[-2]
def getPostersByUrl(url): def getPostersByUrl(url, group=True):
posters = [] posters = []
html = getUrlUnicode(url) html = getUrlUnicode(url)
if not "The movie you requested is not found" in html: if url in html:
results = re.compile('<a href="(http://www.movieposterdb.com/group/.*?)">', re.DOTALL).findall(html) if group:
results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
for result in results: for result in results:
posters += getPostersByUrl(result) posters += getPostersByUrl(result, False)
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.*?)">', re.DOTALL).findall(html) results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
for result in results: for result in results:
html = getUrlUnicode(result) html = getUrlUnicode(result)
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"')) posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
return posters return posters
def getUrl(id): def getUrl(id):