wikipedia by amg_id, only one place to keep the criterion poster url
This commit is contained in:
parent
37bd88fa4a
commit
26f82c01b4
2 changed files with 15 additions and 2 deletions
|
@ -15,7 +15,7 @@ def getData(criterionId):
|
||||||
data = {}
|
data = {}
|
||||||
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId)
|
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % criterionId)
|
||||||
data['criterionId'] = criterionId
|
data['criterionId'] = criterionId
|
||||||
data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
|
data['posterUrl'] = getPosterUrl(criterionId)
|
||||||
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
|
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
|
||||||
result = re.compile("<title>The Criterion Collection: (.*?) by (.*?)</title>").findall(html)
|
result = re.compile("<title>The Criterion Collection: (.*?) by (.*?)</title>").findall(html)
|
||||||
data['title'] = stripTags(result[0][0])
|
data['title'] = stripTags(result[0][0])
|
||||||
|
@ -27,6 +27,9 @@ def getCriterionIds():
|
||||||
html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine')
|
html = getUrlUnicode('http://criterion.com/asp/list.asp?sort=spine')
|
||||||
return re.compile('release.asp\?id=(.*?)"').findall(html)
|
return re.compile('release.asp\?id=(.*?)"').findall(html)
|
||||||
|
|
||||||
|
def getPosterUrl(criterionId):
|
||||||
|
return 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % criterionId
|
||||||
|
|
||||||
def getMovieId(title = '', director = '', imdbId = ''):
|
def getMovieId(title = '', director = '', imdbId = ''):
|
||||||
if not imdbId:
|
if not imdbId:
|
||||||
imdbId = imdb.getMovieId(title, director)
|
imdbId = imdb.getMovieId(title, director)
|
||||||
|
@ -51,7 +54,7 @@ def getMovieData(title = '', director = '', imdbId = ''):
|
||||||
if id:
|
if id:
|
||||||
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % id)
|
html = getUrlUnicode('http://criterion.com/asp/release.asp?id=%s' % id)
|
||||||
data['id'] = id
|
data['id'] = id
|
||||||
data['posterUrl'] = 'http://criterion.com/content/images/full_boxshot/%s_box_348x490.jpg' % id
|
data['posterUrl'] = getPosterUrl(id)
|
||||||
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
|
data['synopsis'] = stripTags(findRe(html, '<h3>Synopsis</h3>(.*?)</div>'))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -24,11 +24,21 @@ def getUrlByImdb(imdbId):
|
||||||
return url
|
return url
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
def getUrlByAmbId(amg_id):
|
||||||
|
query = '"amg_id = %s"'% amg_id
|
||||||
|
result = find(query)
|
||||||
|
if result:
|
||||||
|
url = result[0][1]
|
||||||
|
return url
|
||||||
|
return ''
|
||||||
|
|
||||||
def find(query, max_results=10):
|
def find(query, max_results=10):
|
||||||
query = {'action': 'query', 'list':'search', 'format': 'json',
|
query = {'action': 'query', 'list':'search', 'format': 'json',
|
||||||
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
|
||||||
url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
|
url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
|
||||||
data = getUrl(url)
|
data = getUrl(url)
|
||||||
|
if not data:
|
||||||
|
data = getUrl(url, timeout=0)
|
||||||
result = simplejson.loads(data)
|
result = simplejson.loads(data)
|
||||||
results = []
|
results = []
|
||||||
for r in result['query']['search']:
|
for r in result['query']['search']:
|
||||||
|
|
Loading…
Reference in a new issue