allmovie: parseList function

This commit is contained in:
Rolux 2008-07-25 16:06:50 +02:00
parent 392ca2939b
commit d963f1888a
1 changed files with 6 additions and 3 deletions

View File

@ -15,9 +15,9 @@ def getMovieData(allmovieId):
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
data['country'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:D\|\|\|206">(.*?)</')
data['director'] = stripTags(findRe(html, '<td class="formed-sub"><a href="/cg/avg.dll\?p=avg&sql=2:\d+">(.*?)</td>')).split(', ')
data['genre'] = map(lambda x: stripTags(x), findRe(html, '<span>Genre / Type</span>(.*?)</table>').split('</li><li>'))
data['keywords'] = map(lambda x: stripTags(x), findRe(html, '<span>Keywords</span>(.*?)</table>').split('</li><li>'))
data['themes'] = map(lambda x: stripTags(x), findRe(html, '<span>Themes</span>(.*?)</table>').split('</li><li>'))
data['genre'] = parseList(html, 'Genre / Type')
data['keywords'] = parseList(html, 'Keywords')
data['themes'] = parseList(html, 'Themes')
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId)
data['review'] = stripTags(findRe(html, 'Review</td>.*?<td colspan="2"><p>(.*?)</td>'))
return data
@ -28,6 +28,9 @@ def getMoviePoster(allmovieId):
return data['poster']
return ''
def parseList(html, title):
return map(lambda x: stripTags(x), findRe(html, '<span>%s</span>(.*?)</table>' % title).split('</li><li>'))
if __name__ == '__main__':
print getMovieData('177524')