# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import time
from oxlib import stripTags, findRe
from oxlib.cache import getUrlUnicode
def getMovieData(allmovieId):
data = {}
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T0" % allmovieId)
data['poster'] = findRe(html, '(\d{4})')
data['runtime'] = findRe(html, 'sql=24:\d{4}">.*?">(.*?)min.').strip()
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
data['country'] = findRe(html, '(.*?)')
data['director'] = stripTags(findRe(html, '
(.*?) | ')).split(', ')
data['genre'] = parseList(html, 'Genre / Type')
data['keywords'] = parseList(html, 'Keywords')
data['themes'] = parseList(html, 'Themes')
data['boxoffice'] = parseEntry(html, 'Box office')[1:].replace(',', '')
data['produced'] = parseEntry(html, 'Produced by')
data['releasedate'] = parseEntry(html, 'Release')[0:10].replace(' ', '-')
data['released'] = parseEntry(html, 'Released by')
data['synopsis'] = stripTags(findRe(html, 'Plot Synopsis.*?(.*?) | '))
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId)
data['review'] = stripTags(findRe(html, 'Review.*?(.*?) | '))
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T2" % allmovieId)
data['cast'] = map(
lambda x: map(
lambda x: stripTags(x).strip(),
x.split(' -')
),
findRe(html, ' (.*?)').split(' | ')[:-1]
)
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T3" % allmovieId)
data['credits'] = map(
lambda x: map(
lambda x: stripTags(x).strip(),
x.split(' -')
),
findRe(html, ' (.*?)').split(' | ')[:-1]
)
return data
def getMoviePoster(allmovieId):
data = getMovieData(allmovieId)
if data:
return data['poster']
return ''
def parseEntry(html, title):
return stripTags(findRe(html, '%s(.*?)' % title))
def parseList(html, title):
return map(lambda x: stripTags(x), findRe(html, '%s(.*?)' % title).split(''))
if __name__ == '__main__':
print getMovieData('177524')