From e0dd4d53b1ff2c0961af86ad820d36b507aa980e Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 29 Sep 2012 18:13:58 +0200 Subject: [PATCH] fix only one connection, parse connection description too --- ox/web/imdb.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 382b7a4..8c25634 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -381,7 +381,7 @@ class Imdb(SiteParser): if 'connections' in self: cc={} - if len(self['connections']) == 2 and isinstance(self['connections'][0], basestring): + if len(self['connections']) == 3 and isinstance(self['connections'][0], basestring): self['connections'] = [self['connections']] for rel, data, _ in self['connections']: #cc[unicode(rel)] = re.compile('(.*?)').findall(data) @@ -389,11 +389,15 @@ class Imdb(SiteParser): title = c[1] if title.startswith('"') and title.endswith('"'): title = title[1:-1] - return { + r = { 'id': c[0], - 'title': title + 'title': title, } - cc[unicode(rel)] = map(get_conn, re.compile('(.*?)').findall(data)) + description = c[2].split('
') + if len(description) == 2: + r['description'] = description[-1].strip() + return r + cc[unicode(rel)] = map(get_conn, re.compile('(.*?)(.*?)<\/div', re.DOTALL).findall(data)) self['connections'] = cc