From d21b0b675cea8e32d0c59d8284e5483a98be4c94 Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Mon, 9 Jul 2018 15:20:00 +0200
Subject: [PATCH 1/2] criterion is https now

---
 ox/web/criterion.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/ox/web/criterion.py b/ox/web/criterion.py
index d687b9b..6cef01e 100644
--- a/ox/web/criterion.py
+++ b/ox/web/criterion.py
@@ -14,7 +14,7 @@ def get_id(url):
     return url.split("/")[-1]
 
 def get_url(id):
-    return "http://www.criterion.com/films/%s" % id
+    return "https://www.criterion.com/films/%s" % id
 
 def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
     '''
@@ -67,7 +67,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
         data["posters"] = [result]
     else:
         html_ = read_url(result, unicode=True)
-        result = find_re(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id)
+        result = find_re(html_, '//www.criterion.com/films/%s.*?">(.*?)</a>' % id)
         result = find_re(result, "src=\"(.*?)\"")
         if result:
             data["posters"] = [result.replace("_w100", "")]
@@ -102,7 +102,7 @@ def get_ids(page=None):
     ids += results
     results = re.compile("boxsets/(.*?)\"").findall(html)
     for result in results:
-        html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
+        html = read_url("https://www.criterion.com/boxsets/" + result, unicode=True)
         results = re.compile("films/(\d+)-").findall(html)
         ids += results
     return sorted(set(ids), key=int)

From 7041d1b31610a6e9ce37273a83006b376f0f4b2a Mon Sep 17 00:00:00 2001
From: j <j@mailb.org>
Date: Mon, 9 Jul 2018 15:20:13 +0200
Subject: [PATCH 2/2] parse lyricist, singer

---
 ox/web/imdb.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index b28bf2b..57b2423 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -512,14 +512,23 @@ class Imdb(SiteParser):
             credits = [c for c in credits if c[1]]
 
             self['credits'] = []
+            self['lyricist'] = []
+            self['singer'] = []
             for department, crew in credits:
                 department = department.replace('(in alphabetical order)', '').strip()
                 for c in crew:
+                    name = c[0]
+                    roles = c[1]
                     self['credits'].append({
-                        'name': c[0],
-                        'roles': c[1],
+                        'name': name,
+                        'roles': roles,
                         'deparment': department
                     })
+                    if department == 'Music Department':
+                        if 'lyricist' in roles:
+                            self['lyricist'].append(name)
+                        if 'playback singer' in roles:
+                            self['singer'].append(name)
             if not self['credits']:
                 del self['credits']