From 6df54b5ca2f70ac85667ac808c3cb8f5c7bcb134 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 22 Sep 2012 22:52:18 +0200 Subject: [PATCH] add title stop words again --- ox/web/imdb.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index eea4ed1..fbd0441 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -291,6 +291,14 @@ class Imdb(SiteParser): for t in self.get('alternativeTitles', []): for type in t[1].split('/'): type = type.strip() + for key in ( + 'complete title', + 'recut version', + 'script title', + 'working title', + ): + if key in type: + continue for regexp in ( "^.+ \(imdb display title\) \(English title\)$", "^International \(English title\)$", @@ -325,7 +333,7 @@ class Imdb(SiteParser): self[t] = cleanup_title(self[t]) if 'internationalTitle' in self and \ - self.get('title') == self['internationalTitle']: + self.get('title', '').lower() == self['internationalTitle'].lower(): del self['internationalTitle'] if 'alternativeTitles' in self: