diff --git a/ox/web/criterion.py b/ox/web/criterion.py index d687b9b..6cef01e 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -14,7 +14,7 @@ def get_id(url): return url.split("/")[-1] def get_url(id): - return "http://www.criterion.com/films/%s" % id + return "https://www.criterion.com/films/%s" % id def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): ''' @@ -67,7 +67,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): data["posters"] = [result] else: html_ = read_url(result, unicode=True) - result = find_re(html_, '(.*?)' % id) + result = find_re(html_, '//www.criterion.com/films/%s.*?">(.*?)' % id) result = find_re(result, "src=\"(.*?)\"") if result: data["posters"] = [result.replace("_w100", "")] @@ -102,7 +102,7 @@ def get_ids(page=None): ids += results results = re.compile("boxsets/(.*?)\"").findall(html) for result in results: - html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True) + html = read_url("https://www.criterion.com/boxsets/" + result, unicode=True) results = re.compile("films/(\d+)-").findall(html) ids += results return sorted(set(ids), key=int) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index b28bf2b..57b2423 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -512,14 +512,23 @@ class Imdb(SiteParser): credits = [c for c in credits if c[1]] self['credits'] = [] + self['lyricist'] = [] + self['singer'] = [] for department, crew in credits: department = department.replace('(in alphabetical order)', '').strip() for c in crew: + name = c[0] + roles = c[1] self['credits'].append({ - 'name': c[0], - 'roles': c[1], + 'name': name, + 'roles': roles, 'deparment': department }) + if department == 'Music Department': + if 'lyricist' in roles: + self['lyricist'].append(name) + if 'playback singer' in roles: + self['singer'].append(name) if not self['credits']: del self['credits']