parse fullcredits
This commit is contained in:
parent
51af80545f
commit
67d30ef88e
1 changed files with 36 additions and 2 deletions
|
@ -41,7 +41,6 @@ class Imdb(SiteParser):
|
|||
"td>(.*?)</td>.*?<td>(.*?)</td>"
|
||||
],
|
||||
'type': 'list'
|
||||
|
||||
},
|
||||
'aspectratio': {
|
||||
'page': 'combined',
|
||||
|
@ -304,7 +303,16 @@ class Imdb(SiteParser):
|
|||
'page': 'combined',
|
||||
're': '="og:title" content="[^"]*?\((\d{4}).*?"',
|
||||
'type': 'int'
|
||||
}
|
||||
},
|
||||
'credits': {
|
||||
'page': 'fullcredits',
|
||||
're': [
|
||||
lambda data: data.split('<h4'),
|
||||
'>(.*?)</h4>.*?(<table.*?</table>)',
|
||||
lambda data: [d for d in data if d]
|
||||
],
|
||||
'type': 'list'
|
||||
},
|
||||
}
|
||||
|
||||
def read_url(self, url, timeout):
|
||||
|
@ -620,6 +628,32 @@ class Imdb(SiteParser):
|
|||
self['summary'] = self['summary'][0]
|
||||
self['summary'] = self['summary'].split('</p')[0].strip()
|
||||
|
||||
if 'credits' in self:
|
||||
credits = [
|
||||
[
|
||||
strip_tags(d[0].replace(' by', '')).strip(),
|
||||
[
|
||||
[
|
||||
strip_tags(x[0]).strip(),
|
||||
[t.strip().split(' (')[0].strip() for t in x[2].split(' / ')]
|
||||
]
|
||||
for x in
|
||||
re.compile('<td class="name">(.*?)</td>.*?<td>(.*?)</td>.*?<td class="credit">(.*?)</td>', re.DOTALL).findall(d[1])
|
||||
]
|
||||
] for d in self['credits'] if d
|
||||
]
|
||||
credits = [c for c in credits if c[1]]
|
||||
|
||||
self['credits'] = []
|
||||
for department, crew in credits:
|
||||
department = department.replace('(in alphabetical order)', '').strip()
|
||||
for c in crew:
|
||||
self['credits'].append({
|
||||
'name': c[0],
|
||||
'roles': c[1],
|
||||
'deparment': department
|
||||
})
|
||||
|
||||
class ImdbCombined(Imdb):
|
||||
def __init__(self, id, timeout=-1):
|
||||
_regex = {}
|
||||
|
|
Loading…
Reference in a new issue