updating allmovie module
This commit is contained in:
parent
acb0e3f0e8
commit
e34f222e54
1 changed files with 49 additions and 41 deletions
|
@ -6,55 +6,63 @@ import time
|
||||||
from oxlib import stripTags, findRe
|
from oxlib import stripTags, findRe
|
||||||
from oxlib.cache import getUrlUnicode
|
from oxlib.cache import getUrlUnicode
|
||||||
|
|
||||||
def getMovieData(allmovieId):
|
|
||||||
data = {}
|
def getId(url):
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T0" % allmovieId)
|
return url.split("/")[-2]
|
||||||
data['poster'] = findRe(html, '<img src="(http://image.*?)"')
|
|
||||||
data['year'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:\d{4}">(\d{4})</a>')
|
def getData(id):
|
||||||
data['runtime'] = findRe(html, 'sql=24:\d{4}">.*?">(.*?)min.</td></tr></table>').strip()
|
data = {
|
||||||
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
|
"url": getUrl(id)
|
||||||
data['country'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:D\|\|\|206">(.*?)</')
|
}
|
||||||
data['director'] = stripTags(findRe(html, '<td class="formed-sub"><a href="/cg/avg.dll\?p=avg&sql=2:\d+">(.*?)</td>')).split(', ')
|
html = getUrlUnicode(data["url"])
|
||||||
data['genre'] = parseList(html, 'Genre / Type')
|
data['aka'] = parseList(html, 'AKA')
|
||||||
|
data['category'] = findRe(html, 'http://allmovie.com/explore/category/.*?">(.*?)</a>')
|
||||||
|
data['countries'] = parseList(html, 'Countries')
|
||||||
|
data['director'] = parseEntry(html, 'Director')
|
||||||
|
data['genres'] = parseList(html, 'Genres')
|
||||||
data['keywords'] = parseList(html, 'Keywords')
|
data['keywords'] = parseList(html, 'Keywords')
|
||||||
data['themes'] = parseList(html, 'Themes')
|
data['posters'] = [findRe(html, '<img src="(http://image\..*?)"')]
|
||||||
data['boxoffice'] = parseEntry(html, 'Box office')[1:].replace(',', '')
|
data['produced'] = parseList(html, 'Produced by')
|
||||||
data['produced'] = parseEntry(html, 'Produced by')
|
data['rating'] = findRe(html, 'Stars" title="(.*?) Stars"')
|
||||||
data['releasedate'] = parseEntry(html, 'Release')[0:10].replace(' ', '-')
|
|
||||||
data['released'] = parseEntry(html, 'Released by')
|
data['released'] = parseEntry(html, 'Released by')
|
||||||
data['synopsis'] = stripTags(findRe(html, 'Plot Synopsis</td>.*?<td colspan="2"><p>(.*?)</td>'))
|
data['releasedate'] = parseEntry(html, 'Release')[0:10].replace(' ', '-')
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId)
|
data['runtime'] = findRe(html, '<td class="formed-sub" style="width: 86px;">(\d+) min.</td>')
|
||||||
data['review'] = stripTags(findRe(html, 'Review</td>.*?<td colspan="2"><p>(.*?)</td>'))
|
data['set'] = parseEntry(html, 'Set In')
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T2" % allmovieId)
|
data['synopsis'] = parseText(html, 'Plot Synopsis')
|
||||||
data['cast'] = map(
|
data['themes'] = parseList(html, 'Themes')
|
||||||
lambda x: map(
|
data['types'] = parseList(html, 'Types')
|
||||||
lambda x: stripTags(x).strip(),
|
data['year'] = findRe(html, '"http://allmovie.com/explore/year/(.*?)"')
|
||||||
x.split(' </td><td width="305"><i>-')
|
html = getUrlUnicode("http://allmovie.com/work/%s/cast" % id)
|
||||||
),
|
data['cast'] = parseTable(html)
|
||||||
findRe(html, '<div id="results-table">(.*?)</table>').split('</td></tr>')[:-1]
|
html = getUrlUnicode("http://allmovie.com/work/%s/credits" % id)
|
||||||
)
|
data['credits'] = parseTable(html)
|
||||||
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T3" % allmovieId)
|
html = getUrlUnicode("http://allmovie.com/work/%s/review" % id)
|
||||||
data['credits'] = map(
|
data['review'] = parseText(html, 'Review')
|
||||||
lambda x: map(
|
|
||||||
lambda x: stripTags(x).strip(),
|
|
||||||
x.split(' </TD><TD WIDTH=305><I>-')
|
|
||||||
),
|
|
||||||
findRe(html, '<div id="results-table">(.*?)</table>').split('</TD></TR>')[:-1]
|
|
||||||
)
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def getMoviePoster(allmovieId):
|
def getUrl(id):
|
||||||
data = getMovieData(allmovieId)
|
return "http://allmovie.com/work/%s/" % id
|
||||||
if data:
|
|
||||||
return data['poster']
|
|
||||||
return ''
|
|
||||||
|
|
||||||
def parseEntry(html, title):
|
def parseEntry(html, title):
|
||||||
return stripTags(findRe(html, '<span>%s</span>(.*?)</table>' % title))
|
return stripTags(findRe(html, '<span>%s</span>(.*?)</table>' % title)).strip()
|
||||||
|
|
||||||
def parseList(html, title):
|
def parseList(html, title):
|
||||||
return map(lambda x: stripTags(x), findRe(html, '<span>%s</span>(.*?)</table>' % title).split('</li><li>'))
|
html = findRe(html, '<span>%s</span>(.*?)</table>' % title)
|
||||||
|
return map(lambda x: stripTags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
|
||||||
|
|
||||||
|
def parseTable(html):
|
||||||
|
return map(
|
||||||
|
lambda x: map(
|
||||||
|
lambda x: stripTags(x).strip().replace(' ', ''),
|
||||||
|
x.split('<td width="305">-')
|
||||||
|
),
|
||||||
|
findRe(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
|
||||||
|
)
|
||||||
|
|
||||||
|
def parseText(html, title):
|
||||||
|
return stripTags(findRe(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print getMovieData('177524')
|
print getData('129689')
|
||||||
|
# print getData('177524')
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue