From 639b74eac39d425e4a9bca219a48478839d4991d Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 10 Oct 2012 13:27:19 +0200 Subject: [PATCH] reduce choice of alternative titles for english films, fixes #1084 --- ox/web/imdb.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/ox/web/imdb.py b/ox/web/imdb.py index 256f92e..3a6ab71 100644 --- a/ox/web/imdb.py +++ b/ox/web/imdb.py @@ -307,23 +307,28 @@ class Imdb(SiteParser): break if not stop_word and not type in types: types[type] = t[0] - for regexp in ( + regexps = [ "^.+ \(imdb display title\) \(English title\)$", "^International \(English title\)$", - "^.+ \(English title\)$", "^International \(.+\) \(English title\)$", "^.+ \(.+\) \(English title\)$", "^USA$", "^UK$", "^USA \(imdb display title\)$", "^UK \(imdb display title\)$", - "^USA \(.+\)$", - "^UK \(.+\)$", - "^Australia \(.+\)$", - "^International \(.+ title\)$", - ): + ] + if not filter(lambda c: c in ('USA', 'UK', 'Australia'), self.get('country', [])): + regexps.insert(2, "^.+ \(English title\)$") + regexps += [ + "^USA \(.+\)$", + "^UK \(.+\)$", + "^Australia \(.+\)$", + "^International \(.+ title\)$", + ] + for regexp in regexps: for type in types: if re.compile(regexp).findall(type): + #print types[type], type self['internationalTitle'] = types[type] break if 'internationalTitle' in self: