From e0dd4d53b1ff2c0961af86ad820d36b507aa980e Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Sat, 29 Sep 2012 18:13:58 +0200
Subject: [PATCH] fix only one connection, parse connection description too
---
ox/web/imdb.py | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/ox/web/imdb.py b/ox/web/imdb.py
index 382b7a4..8c25634 100644
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@@ -381,7 +381,7 @@ class Imdb(SiteParser):
if 'connections' in self:
cc={}
- if len(self['connections']) == 2 and isinstance(self['connections'][0], basestring):
+ if len(self['connections']) == 3 and isinstance(self['connections'][0], basestring):
self['connections'] = [self['connections']]
for rel, data, _ in self['connections']:
#cc[unicode(rel)] = re.compile('(.*?)').findall(data)
@@ -389,11 +389,15 @@ class Imdb(SiteParser):
title = c[1]
if title.startswith('"') and title.endswith('"'):
title = title[1:-1]
- return {
+ r = {
'id': c[0],
- 'title': title
+ 'title': title,
}
- cc[unicode(rel)] = map(get_conn, re.compile('(.*?)').findall(data))
+ description = c[2].split('
')
+ if len(description) == 2:
+ r['description'] = description[-1].strip()
+ return r
+ cc[unicode(rel)] = map(get_conn, re.compile('(.*?)(.*?)<\/div', re.DOTALL).findall(data))
self['connections'] = cc