diff --git a/ox/criterion.py b/ox/criterion.py index d1fcb10..1e71e5d 100644 --- a/ox/criterion.py +++ b/ox/criterion.py @@ -1,9 +1,31 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 import re import ox.imdb as imdb from oxutils.cache import getUrlUnicode from oxutils.html import stripTags -from oxutils.text import findRe +from oxutils.text import findRe, removeSpecialCharacters + +def getData(criterionId): + ''' + >>> getData(348)['imdbId'] + '0068205' + ''' + data = {} + html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId) + data['criterionId'] = criterionId + data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId + data['synopsis'] = stripTags(findRe(html, '

Synopsis

(.*?)')) + result = re.compile("The Criterion Collection: (.*?) by (.*?)").findall(html) + data['title'] = stripTags(result[0][0]) + data['director'] = stripTags(result[0][1]) + data['imdbId'] = imdb.getMovieId(data['title'], data['director']) + return data + +def getCriterionIds(): + html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine') + return re.compile('release.asp\?id=(.*?)"').findall(html) def getMovieId(title = '', director = '', imdbId = ''): if not imdbId: @@ -34,4 +56,4 @@ def getMovieData(title = '', director = '', imdbId = ''): return data if __name__ == '__main__': - print getMovieData('Le mepris', 'Jean-Luc Godard') \ No newline at end of file + print getMovieData('Le mepris', 'Jean-Luc Godard') diff --git a/ox/imdb.py b/ox/imdb.py index 6aa35ed..d0e6b70 100644 --- a/ox/imdb.py +++ b/ox/imdb.py @@ -112,11 +112,11 @@ def getMovieInfo(imdbId): title = title[1:-1] info['title'] = title info['year'] = year - ''' + #Rating - rating = findRe(data, '(.*?)/10') + rating = findRe(data, '([\d\.]*?)/10') if rating: - info['rating'] = int(float(rating) * 1000) + info['rating'] = float(rating) else: info['rating'] = -1 @@ -126,7 +126,6 @@ def getMovieInfo(imdbId): info['votes'] = int(votes.replace(',', '')) else: info['votes'] = -1 - ''' return info def getMoviePoster(imdbId):