ratings, and some more criterion functions

This commit is contained in:
j 2008-05-10 09:38:14 +02:00
parent ae6ab0d0a7
commit 2d13e2ba31
2 changed files with 27 additions and 6 deletions

View file

@ -1,9 +1,31 @@
# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re import re
import ox.imdb as imdb import ox.imdb as imdb
from oxutils.cache import getUrlUnicode from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags from oxutils.html import stripTags
from oxutils.text import findRe from oxutils.text import findRe, removeSpecialCharacters
def getData(criterionId):
'''
>>> getData(348)['imdbId']
'0068205'
'''
data = {}
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId)
data['criterionId'] = criterionId
data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
result = re.compile("<title>The Criterion Collection: (.*?) by (.*?)</title>").findall(html)
data['title'] = stripTags(result[0][0])
data['director'] = stripTags(result[0][1])
data['imdbId'] = imdb.getMovieId(data['title'], data['director'])
return data
def getCriterionIds():
html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine')
return re.compile('release.asp\?id=(.*?)"').findall(html)
def getMovieId(title = '', director = '', imdbId = ''): def getMovieId(title = '', director = '', imdbId = ''):
if not imdbId: if not imdbId:
@ -34,4 +56,4 @@ def getMovieData(title = '', director = '', imdbId = ''):
return data return data
if __name__ == '__main__': if __name__ == '__main__':
print getMovieData('Le mepris', 'Jean-Luc Godard') print getMovieData('Le mepris', 'Jean-Luc Godard')

View file

@ -112,11 +112,11 @@ def getMovieInfo(imdbId):
title = title[1:-1] title = title[1:-1]
info['title'] = title info['title'] = title
info['year'] = year info['year'] = year
'''
#Rating #Rating
rating = findRe(data, '<b>(.*?)/10</b>') rating = findRe(data, '<b>([\d\.]*?)/10</b>')
if rating: if rating:
info['rating'] = int(float(rating) * 1000) info['rating'] = float(rating)
else: else:
info['rating'] = -1 info['rating'] = -1
@ -126,7 +126,6 @@ def getMovieInfo(imdbId):
info['votes'] = int(votes.replace(',', '')) info['votes'] = int(votes.replace(',', ''))
else: else:
info['votes'] = -1 info['votes'] = -1
'''
return info return info
def getMoviePoster(imdbId): def getMoviePoster(imdbId):