diff --git a/ox/criterion.py b/ox/criterion.py index 1e71e5d..4ba4fe6 100644 --- a/ox/criterion.py +++ b/ox/criterion.py @@ -15,7 +15,7 @@ def getData(criterionId): data = {} html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId) data['criterionId'] = criterionId - data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId + data['posterUrl'] = getPosterUrl(criterionId) data['synopsis'] = stripTags(findRe(html, '

Synopsis

(.*?)')) result = re.compile("The Criterion Collection: (.*?) by (.*?)").findall(html) data['title'] = stripTags(result[0][0]) @@ -27,6 +27,9 @@ def getCriterionIds(): html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine') return re.compile('release.asp\?id=(.*?)"').findall(html) +def getPosterUrl(criterionId): + return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId + def getMovieId(title = '', director = '', imdbId = ''): if not imdbId: imdbId = imdb.getMovieId(title, director) @@ -51,7 +54,7 @@ def getMovieData(title = '', director = '', imdbId = ''): if id: html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % id) data['id'] = id - data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % id + data['posterUrl'] = getPosterUrl(id) data['synopsis'] = stripTags(findRe(html, '

Synopsis

(.*?)')) return data diff --git a/ox/wikipedia.py b/ox/wikipedia.py index f433ba0..b7d591a 100644 --- a/ox/wikipedia.py +++ b/ox/wikipedia.py @@ -24,11 +24,21 @@ def getUrlByImdb(imdbId): return url return '' +def getUrlByAmbId(amg_id): + query = '"amg_id = %s"'% amg_id + result = find(query) + if result: + url = result[0][1] + return url + return '' + def find(query, max_results=10): query = {'action': 'query', 'list':'search', 'format': 'json', 'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')} url = "http://en.wikipedia.org/w/api.php?" + urlencode(query) data = getUrl(url) + if not data: + data = getUrl(url, timeout=0) result = simplejson.loads(data) results = [] for r in result['query']['search']: