diff --git a/ox/criterion.py b/ox/criterion.py
index d1fcb10..1e71e5d 100644
--- a/ox/criterion.py
+++ b/ox/criterion.py
@@ -1,9 +1,31 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
import re
import ox.imdb as imdb
from oxutils.cache import getUrlUnicode
from oxutils.html import stripTags
-from oxutils.text import findRe
+from oxutils.text import findRe, removeSpecialCharacters
+
+def getData(criterionId):
+ '''
+ >>> getData(348)['imdbId']
+ '0068205'
+ '''
+ data = {}
+ html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId)
+ data['criterionId'] = criterionId
+ data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
+ data['synopsis'] = stripTags(findRe(html, '
Synopsis
(.*?)'))
+ result = re.compile("The Criterion Collection: (.*?) by (.*?)").findall(html)
+ data['title'] = stripTags(result[0][0])
+ data['director'] = stripTags(result[0][1])
+ data['imdbId'] = imdb.getMovieId(data['title'], data['director'])
+ return data
+
+def getCriterionIds():
+ html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine')
+ return re.compile('release.asp\?id=(.*?)"').findall(html)
def getMovieId(title = '', director = '', imdbId = ''):
if not imdbId:
@@ -34,4 +56,4 @@ def getMovieData(title = '', director = '', imdbId = ''):
return data
if __name__ == '__main__':
- print getMovieData('Le mepris', 'Jean-Luc Godard')
\ No newline at end of file
+ print getMovieData('Le mepris', 'Jean-Luc Godard')
diff --git a/ox/imdb.py b/ox/imdb.py
index 6aa35ed..d0e6b70 100644
--- a/ox/imdb.py
+++ b/ox/imdb.py
@@ -112,11 +112,11 @@ def getMovieInfo(imdbId):
title = title[1:-1]
info['title'] = title
info['year'] = year
- '''
+
#Rating
- rating = findRe(data, '(.*?)/10')
+ rating = findRe(data, '([\d\.]*?)/10')
if rating:
- info['rating'] = int(float(rating) * 1000)
+ info['rating'] = float(rating)
else:
info['rating'] = -1
@@ -126,7 +126,6 @@ def getMovieInfo(imdbId):
info['votes'] = int(votes.replace(',', ''))
else:
info['votes'] = -1
- '''
return info
def getMoviePoster(imdbId):