diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 89a326e..2717f84 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -41,7 +41,6 @@ class Imdb(SiteParser):
"td>(.*?).*?
(.*?) | "
],
'type': 'list'
-
},
'aspectratio': {
'page': 'combined',
@@ -304,7 +303,16 @@ class Imdb(SiteParser):
'page': 'combined',
're': '="og:title" content="[^"]*?\((\d{4}).*?"',
'type': 'int'
- }
+ },
+ 'credits': {
+ 'page': 'fullcredits',
+ 're': [
+ lambda data: data.split('(.*?)
.*?()',
+ lambda data: [d for d in data if d]
+ ],
+ 'type': 'list'
+ },
}
def read_url(self, url, timeout):
@@ -620,6 +628,32 @@ class Imdb(SiteParser):
self['summary'] = self['summary'][0]
self['summary'] = self['summary'].split('(.*?).*?(.*?) | .*?(.*?) | ', re.DOTALL).findall(d[1])
+ ]
+ ] for d in self['credits'] if d
+ ]
+ credits = [c for c in credits if c[1]]
+
+ self['credits'] = []
+ for department, crew in credits:
+ department = department.replace('(in alphabetical order)', '').strip()
+ for c in crew:
+ self['credits'].append({
+ 'name': c[0],
+ 'roles': c[1],
+ 'deparment': department
+ })
+
class ImdbCombined(Imdb):
def __init__(self, id, timeout=-1):
_regex = {}