# -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import re import time from oxlib import stripTags, findRe from oxlib.cache import getUrlUnicode def getMovieData(allmovieId): data = {} html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T0" % allmovieId) data['poster'] = findRe(html, '(\d{4})') data['runtime'] = findRe(html, 'sql=24:\d{4}">.*?">(.*?)min.').strip() data['rating'] = findRe(html, '" alt="(\d+?) Stars"') data['country'] = findRe(html, '(.*?)(.*?)')).split(', ') data['genre'] = parseList(html, 'Genre / Type') data['keywords'] = parseList(html, 'Keywords') data['themes'] = parseList(html, 'Themes') data['boxoffice'] = parseEntry(html, 'Box office')[1:].replace(',', '') data['produced'] = parseEntry(html, 'Produced by') data['releasedate'] = parseEntry(html, 'Release')[0:10].replace(' ', '-') data['released'] = parseEntry(html, 'Released by') data['synopsis'] = stripTags(findRe(html, 'Plot Synopsis.*?

(.*?)')) html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId) data['review'] = stripTags(findRe(html, 'Review.*?

(.*?)')) html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T2" % allmovieId) data['cast'] = map( lambda x: map( lambda x: stripTags(x).strip(), x.split(' -') ), findRe(html, '

(.*?)').split('')[:-1] ) html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T3" % allmovieId) data['credits'] = map( lambda x: map( lambda x: stripTags(x).strip(), x.split(' -') ), findRe(html, '
(.*?)').split('')[:-1] ) return data def getMoviePoster(allmovieId): data = getMovieData(allmovieId) if data: return data['poster'] return '' def parseEntry(html, title): return stripTags(findRe(html, '%s(.*?)' % title)) def parseList(html, title): return map(lambda x: stripTags(x), findRe(html, '%s(.*?)' % title).split('
  • ')) if __name__ == '__main__': print getMovieData('177524')