fix recursion, make sure that result is not redirect

This commit is contained in:
j 2009-08-20 20:29:34 +02:00
parent 1ef34da4d1
commit 7df4cc4fa7

View file

@ -10,6 +10,8 @@ def getData(id):
'''
>>> getData('0060304')['posters'][0]
u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg'
>>> getData('0123456')['posters']
[]
'''
data = {
"url": getUrl(id)
@ -20,17 +22,18 @@ def getData(id):
def getId(url):
return url.split("/")[-2]
def getPostersByUrl(url):
def getPostersByUrl(url, group=True):
posters = []
html = getUrlUnicode(url)
if not "The movie you requested is not found" in html:
results = re.compile('<a href="(http://www.movieposterdb.com/group/.*?)">', re.DOTALL).findall(html)
if url in html:
if group:
results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
for result in results:
posters += getPostersByUrl(result)
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.*?)">', re.DOTALL).findall(html)
posters += getPostersByUrl(result, False)
results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
for result in results:
html = getUrlUnicode(result)
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.*?\.jpg)"'))
posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
return posters
def getUrl(id):