some more special cases

2012-11-11 17:15:40 +01:00 · 2012-11-11 17:15:40 +01:00 · 6529e5f1c1
commit 6529e5f1c1
parent 43a54740bd
1 changed files with 29 additions and 14 deletions
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -289,23 +289,33 @@ class Imdb(SiteParser):
               self['alternativeTitles'] = [self['alternativeTitles']]
        types = {}
        stop_words = [ 
            'alternative spelling',
            'alternative title',
            'alternative transliteration',
            'complete title',
            'IMAX version',
            'informal short title',
            'longer version',
            'new title',
            'recut version',
            'reissue title',
            'restored version',
            'script title',
            'TV title',
            'working title',
        ]
        #ignore english japanese titles
        #for movies that are not only from japan
        if ['Japan'] != self.get('country', []):
            stop_words += [
                'Japan (English title)'
            ]
        for t in self.get('alternativeTitles', []):
            for type in t[1].split('/'):
                type = type.strip()
                stop_word = False
-                for key in (
+                for key in stop_words:
                    'alternative title',
                    'complete title',
                    'recut version',
                    'script title',
                    'working title',
                    'reissue title',
                    'IMAX version',
                    'alternative spelling',
                    'informal short title',
                    'alternative transliteration',
                    'restored version'
                ):
                    if key in type:
                        stop_word = True
                        break
@ -318,7 +328,12 @@ class Imdb(SiteParser):
            "^International \(English title\)$",
            "^International \(.+\) \(English title\)$",
        ]
-        if not filter(lambda c: c in ('USA', 'UK', 'Australia', 'New Zealand'), self.get('country', [])):
+        if 'Hong Kong' in self.get('country', []):
            regexps += [
                "Hong Kong \(English title\)"
            ]
        english_countries = ('USA', 'UK', 'Australia', 'New Zealand')
        if not filter(lambda c: c in english_countries, self.get('country', [])):
            regexps += [
                "^[^(]+ \(English title\)$",
                "^.+ \(.+\) \(English title\)$",