allmovie: more data
This commit is contained in:
parent
314fd03934
commit
aee1085e35
1 changed files with 8 additions and 1 deletions
|
@ -10,7 +10,6 @@ def getMovieData(allmovieId):
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T0" % allmovieId)
|
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T0" % allmovieId)
|
||||||
data = {}
|
data = {}
|
||||||
data['poster'] = findRe(html, '<img src="(http://image.*?)"')
|
data['poster'] = findRe(html, '<img src="(http://image.*?)"')
|
||||||
data['synopsis'] = stripTags(findRe(html, 'Plot Synopsis</td>.*?<td colspan="2"><p>(.*?)</td>'))
|
|
||||||
data['year'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:\d{4}">(\d{4})</a>')
|
data['year'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:\d{4}">(\d{4})</a>')
|
||||||
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
|
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
|
||||||
data['country'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:D\|\|\|206">(.*?)</')
|
data['country'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:D\|\|\|206">(.*?)</')
|
||||||
|
@ -18,6 +17,11 @@ def getMovieData(allmovieId):
|
||||||
data['genre'] = parseList(html, 'Genre / Type')
|
data['genre'] = parseList(html, 'Genre / Type')
|
||||||
data['keywords'] = parseList(html, 'Keywords')
|
data['keywords'] = parseList(html, 'Keywords')
|
||||||
data['themes'] = parseList(html, 'Themes')
|
data['themes'] = parseList(html, 'Themes')
|
||||||
|
data['boxoffice'] = parseEntry(html, 'Box office')[1:].replace(',', '')
|
||||||
|
data['produced'] = parseEntry(html, 'Produced by')
|
||||||
|
data['releasedate'] = parseEntry(html, 'Release')[0:10].replace(' ', '-')
|
||||||
|
data['released'] = parseEntry(html, 'Released by')
|
||||||
|
data['synopsis'] = stripTags(findRe(html, 'Plot Synopsis</td>.*?<td colspan="2"><p>(.*?)</td>'))
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId)
|
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId)
|
||||||
data['review'] = stripTags(findRe(html, 'Review</td>.*?<td colspan="2"><p>(.*?)</td>'))
|
data['review'] = stripTags(findRe(html, 'Review</td>.*?<td colspan="2"><p>(.*?)</td>'))
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T2" % allmovieId)
|
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T2" % allmovieId)
|
||||||
|
@ -44,6 +48,9 @@ def getMoviePoster(allmovieId):
|
||||||
return data['poster']
|
return data['poster']
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
def parseEntry(html, title):
|
||||||
|
return stripTags(findRe(html, '<span>%s</span>(.*?)</table>' % title))
|
||||||
|
|
||||||
def parseList(html, title):
|
def parseList(html, title):
|
||||||
return map(lambda x: stripTags(x), findRe(html, '<span>%s</span>(.*?)</table>' % title).split('</li><li>'))
|
return map(lambda x: stripTags(x), findRe(html, '<span>%s</span>(.*?)</table>' % title).split('</li><li>'))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue