Compare commits
2 commits
17deba03f2
...
7041d1b316
| Author | SHA1 | Date | |
|---|---|---|---|
| 7041d1b316 | |||
| d21b0b675c |
2 changed files with 14 additions and 5 deletions
|
|
@ -14,7 +14,7 @@ def get_id(url):
|
||||||
return url.split("/")[-1]
|
return url.split("/")[-1]
|
||||||
|
|
||||||
def get_url(id):
|
def get_url(id):
|
||||||
return "http://www.criterion.com/films/%s" % id
|
return "https://www.criterion.com/films/%s" % id
|
||||||
|
|
||||||
def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
||||||
'''
|
'''
|
||||||
|
|
@ -67,7 +67,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
||||||
data["posters"] = [result]
|
data["posters"] = [result]
|
||||||
else:
|
else:
|
||||||
html_ = read_url(result, unicode=True)
|
html_ = read_url(result, unicode=True)
|
||||||
result = find_re(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id)
|
result = find_re(html_, '//www.criterion.com/films/%s.*?">(.*?)</a>' % id)
|
||||||
result = find_re(result, "src=\"(.*?)\"")
|
result = find_re(result, "src=\"(.*?)\"")
|
||||||
if result:
|
if result:
|
||||||
data["posters"] = [result.replace("_w100", "")]
|
data["posters"] = [result.replace("_w100", "")]
|
||||||
|
|
@ -102,7 +102,7 @@ def get_ids(page=None):
|
||||||
ids += results
|
ids += results
|
||||||
results = re.compile("boxsets/(.*?)\"").findall(html)
|
results = re.compile("boxsets/(.*?)\"").findall(html)
|
||||||
for result in results:
|
for result in results:
|
||||||
html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
|
html = read_url("https://www.criterion.com/boxsets/" + result, unicode=True)
|
||||||
results = re.compile("films/(\d+)-").findall(html)
|
results = re.compile("films/(\d+)-").findall(html)
|
||||||
ids += results
|
ids += results
|
||||||
return sorted(set(ids), key=int)
|
return sorted(set(ids), key=int)
|
||||||
|
|
|
||||||
|
|
@ -512,14 +512,23 @@ class Imdb(SiteParser):
|
||||||
credits = [c for c in credits if c[1]]
|
credits = [c for c in credits if c[1]]
|
||||||
|
|
||||||
self['credits'] = []
|
self['credits'] = []
|
||||||
|
self['lyricist'] = []
|
||||||
|
self['singer'] = []
|
||||||
for department, crew in credits:
|
for department, crew in credits:
|
||||||
department = department.replace('(in alphabetical order)', '').strip()
|
department = department.replace('(in alphabetical order)', '').strip()
|
||||||
for c in crew:
|
for c in crew:
|
||||||
|
name = c[0]
|
||||||
|
roles = c[1]
|
||||||
self['credits'].append({
|
self['credits'].append({
|
||||||
'name': c[0],
|
'name': name,
|
||||||
'roles': c[1],
|
'roles': roles,
|
||||||
'deparment': department
|
'deparment': department
|
||||||
})
|
})
|
||||||
|
if department == 'Music Department':
|
||||||
|
if 'lyricist' in roles:
|
||||||
|
self['lyricist'].append(name)
|
||||||
|
if 'playback singer' in roles:
|
||||||
|
self['singer'].append(name)
|
||||||
if not self['credits']:
|
if not self['credits']:
|
||||||
del self['credits']
|
del self['credits']
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue