python-oxweb/oxweb/allmovie.py

34 lines
1.5 KiB
Python
Raw Normal View History

2008-07-04 13:56:02 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import time
from oxlib import stripTags, findRe
from oxlib.cache import getUrlUnicode
def getMovieData(allmovieId):
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T0" % allmovieId)
2008-07-25 11:54:20 +00:00
data = {}
data['poster'] = findRe(html, '<img src="(http://image.*?)"')
data['synopsis'] = stripTags(findRe(html, 'Plot Synopsis</td>.*?<td colspan="2"><p>(.*?)</td>'))
data['year'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&amp;sql=24:\d{4}">(\d{4})</a>')
data['rating'] = findRe(html, '" alt="(\d+?) Stars"')
data['country'] = findRe(html, '<a href="/cg/avg.dll\?p=avg&sql=24:D\|\|\|206">(.*?)</')
data['director'] = stripTags(findRe(html, '<td class="formed-sub"><a href="/cg/avg.dll\?p=avg&sql=2:\d+">(.*?)</td>')).split(', ')
data['genre'] = map(lambda x: stripTags(x), findRe(html, '<span>Genre / Type</span>(.*?)</table>').split('</li><li>'))
data['keywords'] = map(lambda x: stripTags(x), findRe(html, '<span>Keywords</span>(.*?)</table>').split('</li><li>'))
data['themes'] = map(lambda x: stripTags(x), findRe(html, '<span>Themes</span>(.*?)</table>').split('</li><li>'))
html = getUrlUnicode("http://www.allmovie.com/cg/avg.dll?p=avg&sql=1:%s~T1" % allmovieId)
2008-07-25 11:54:20 +00:00
data['review'] = stripTags(findRe(html, 'Review</td>.*?<td colspan="2"><p>(.*?)</td>'))
return data
2008-07-04 13:56:02 +00:00
def getMoviePoster(allmovieId):
data = getMovieData(allmovieId)
2008-07-04 13:56:02 +00:00
if data:
return data['poster']
return ''
if __name__ == '__main__':
print getMovieData('177524')