fix wikipedia parser

This commit is contained in:
j 2013-03-02 05:15:57 +05:30
parent 24410c458a
commit f7186b936c

View file

@ -53,13 +53,23 @@ def get_movie_data(wikipedia_url):
for row in _box: for row in _box:
d = row.split('=') d = row.split('=')
if len(d) == 2: if len(d) == 2:
key = d[0].strip() _key = d[0].strip()
if key[0] == '|': if _key:
key = key[1:] key = _key
if key[0] == '|':
key = key[1:]
value = d[1].strip() value = d[1].strip()
value = value.replace('<!-- see WP:ALT -->', '')
if '<br>' in value: if '<br>' in value:
value = value.split('<br>') value = value.split('<br>')
filmbox[key.strip()] = value key = key.strip()
if value:
if key in filmbox:
filmbox[key] += value
if isinstance(filmbox[key], list):
filmbox[key] = [k for k in filmbox[key] if k]
else:
filmbox[key] = value
if not filmbox_data: if not filmbox_data:
return filmbox return filmbox
if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit(): if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit():