ratings, and some more criterion functions

This commit is contained in:
j 2008-05-10 09:38:14 +02:00
parent ae6ab0d0a7
commit 2d13e2ba31
2 changed files with 27 additions and 6 deletions

View file

@ -1,9 +1,31 @@
# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
import ox.imdb as imdb
from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags
from oxutils.text import findRe
from oxutils.text import findRe, removeSpecialCharacters
def getData(criterionId):
'''
>>> getData(348)['imdbId']
'0068205'
'''
data = {}
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId)
data['criterionId'] = criterionId
data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
result = re.compile("<title>The Criterion Collection: (.*?) by (.*?)</title>").findall(html)
data['title'] = stripTags(result[0][0])
data['director'] = stripTags(result[0][1])
data['imdbId'] = imdb.getMovieId(data['title'], data['director'])
return data
def getCriterionIds():
html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine')
return re.compile('release.asp\?id=(.*?)"').findall(html)
def getMovieId(title = '', director = '', imdbId = ''):
if not imdbId:
@ -34,4 +56,4 @@ def getMovieData(title = '', director = '', imdbId = ''):
return data
if __name__ == '__main__':
print getMovieData('Le mepris', 'Jean-Luc Godard')
print getMovieData('Le mepris', 'Jean-Luc Godard')

View file

@ -112,11 +112,11 @@ def getMovieInfo(imdbId):
title = title[1:-1]
info['title'] = title
info['year'] = year
'''
#Rating
rating = findRe(data, '<b>(.*?)/10</b>')
rating = findRe(data, '<b>([\d\.]*?)/10</b>')
if rating:
info['rating'] = int(float(rating) * 1000)
info['rating'] = float(rating)
else:
info['rating'] = -1
@ -126,7 +126,6 @@ def getMovieInfo(imdbId):
info['votes'] = int(votes.replace(',', ''))
else:
info['votes'] = -1
'''
return info
def getMoviePoster(imdbId):