allmovie: parse runtime

This commit is contained in:
Rolux 2008-07-25 16:43:45 +02:00
parent aee1085e35
commit 8c9647c28a

View file

@ -11,6 +11,7 @@ def getMovieData(allmovieId):
data = {}
data['poster'] = findRe(html, '<img src="(http://image.*?)"')
data['year'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&amp;sql=24:\d{4}">(\d{4})</a>')
data['runtime'] = findRe(html, 'sql=24:\d{4}">.*?">(.*?)min.</td></tr></table>').strip()
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
data['country'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:D\|\|\|206">(.*?)</')
data['director'] = stripTags(findRe(html, '<td class="formed-sub"><a href="/cg/avg.dll\?p=avg&sql=2:\d+">(.*?)</td>')).split(', ')
@ -55,5 +56,5 @@ def parseList(html, title):
return map(lambda x: stripTags(x), findRe(html, '<span>%s</span>(.*?)</table>' % title).split('</li><li>'))
if __name__ == '__main__':
print getMovieData('177524')
print getMovieData('177524')['runtime']