This commit is contained in:
j 2012-08-21 08:41:49 +02:00
parent 61b14e9537
commit 03dbc4d045
2 changed files with 9 additions and 9 deletions

View file

@ -63,19 +63,19 @@ def parse_entry(html, title):
def parse_list(html, title):
html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
r = map(lambda x: strip_tags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
r = map(strip_tags, re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
if not r and html:
r = [strip_tags(html)]
return r
def parse_table(html):
return map(
lambda x: map(
lambda x: strip_tags(x).strip().replace('&nbsp;', ''),
x.split('<td width="305">-')
),
find_re(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
)
return [
[
strip_tags(r).strip().replace('&nbsp;', '')
for r in x.split('<td width="305">-')
]
for x in find_re(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
]
def parse_text(html, title):
return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()

View file

@ -323,7 +323,7 @@ class Imdb(SiteParser):
if isinstance(self['cast'][0], basestring):
self['cast'] = [self['cast']]
self['actor'] = [c[0] for c in self['cast']]
self['cast'] = map(lambda x: {'actor': x[0], 'character': x[1]}, self['cast'])
self['cast'] = [{'actor': x[0], 'character': x[1]} for x in self['cast']]
if 'connections' in self:
cc={}