From 36c6e6908ed8d479f6f80eeba609979ea8619aca Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Fri, 14 Sep 2012 11:17:34 +0200 Subject: [PATCH] cleanup imdb titles, fixes #957 --- ox/web/imdb.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 51c6fbb..05ff05b 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -302,16 +302,19 @@ class Imdb(SiteParser): self['title'] = self.get('englishTitle', self['originalTitle']) for t in ('title', 'englishTitle', 'originalTitle'): - if t in self and self[t].startswith('"') and self[t].endswith('"'): - self[t] = self[t][1:-1] + if t in self: + if self[t].startswith('"') and self[t].endswith('"'): + self[t] = self[t][1:-1] + self[t] = re.sub('\(\#[.\d]+\)', '', self[t]) if 'alternativeTitles' in self: if len(self['alternativeTitles']) == 2 and \ isinstance(self['alternativeTitles'][0], basestring): self['alternativeTitles'] = [self['alternativeTitles']] - self['alternativeTitles'] = [[t[0], + self['alternativeTitles'] = [[re.sub('\(\#[.\d]+\)', '', t[0]), t[1].split(' / ')[0].split('(')[0].strip()] for t in self['alternativeTitles']] + #self[t] = re.sub('\(\#[.\d]+\)', '', self[t]) if 'runtime' in self and self['runtime']: if 'min' in self['runtime']: base=60