parse fullcredits
This commit is contained in:
parent
51af80545f
commit
67d30ef88e
1 changed files with 36 additions and 2 deletions
|
@ -41,7 +41,6 @@ class Imdb(SiteParser):
|
||||||
"td>(.*?)</td>.*?<td>(.*?)</td>"
|
"td>(.*?)</td>.*?<td>(.*?)</td>"
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
|
|
||||||
},
|
},
|
||||||
'aspectratio': {
|
'aspectratio': {
|
||||||
'page': 'combined',
|
'page': 'combined',
|
||||||
|
@ -304,7 +303,16 @@ class Imdb(SiteParser):
|
||||||
'page': 'combined',
|
'page': 'combined',
|
||||||
're': '="og:title" content="[^"]*?\((\d{4}).*?"',
|
're': '="og:title" content="[^"]*?\((\d{4}).*?"',
|
||||||
'type': 'int'
|
'type': 'int'
|
||||||
}
|
},
|
||||||
|
'credits': {
|
||||||
|
'page': 'fullcredits',
|
||||||
|
're': [
|
||||||
|
lambda data: data.split('<h4'),
|
||||||
|
'>(.*?)</h4>.*?(<table.*?</table>)',
|
||||||
|
lambda data: [d for d in data if d]
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def read_url(self, url, timeout):
|
def read_url(self, url, timeout):
|
||||||
|
@ -620,6 +628,32 @@ class Imdb(SiteParser):
|
||||||
self['summary'] = self['summary'][0]
|
self['summary'] = self['summary'][0]
|
||||||
self['summary'] = self['summary'].split('</p')[0].strip()
|
self['summary'] = self['summary'].split('</p')[0].strip()
|
||||||
|
|
||||||
|
if 'credits' in self:
|
||||||
|
credits = [
|
||||||
|
[
|
||||||
|
strip_tags(d[0].replace(' by', '')).strip(),
|
||||||
|
[
|
||||||
|
[
|
||||||
|
strip_tags(x[0]).strip(),
|
||||||
|
[t.strip().split(' (')[0].strip() for t in x[2].split(' / ')]
|
||||||
|
]
|
||||||
|
for x in
|
||||||
|
re.compile('<td class="name">(.*?)</td>.*?<td>(.*?)</td>.*?<td class="credit">(.*?)</td>', re.DOTALL).findall(d[1])
|
||||||
|
]
|
||||||
|
] for d in self['credits'] if d
|
||||||
|
]
|
||||||
|
credits = [c for c in credits if c[1]]
|
||||||
|
|
||||||
|
self['credits'] = []
|
||||||
|
for department, crew in credits:
|
||||||
|
department = department.replace('(in alphabetical order)', '').strip()
|
||||||
|
for c in crew:
|
||||||
|
self['credits'].append({
|
||||||
|
'name': c[0],
|
||||||
|
'roles': c[1],
|
||||||
|
'deparment': department
|
||||||
|
})
|
||||||
|
|
||||||
class ImdbCombined(Imdb):
|
class ImdbCombined(Imdb):
|
||||||
def __init__(self, id, timeout=-1):
|
def __init__(self, id, timeout=-1):
|
||||||
_regex = {}
|
_regex = {}
|
||||||
|
|
Loading…
Reference in a new issue