use geo.normalize_country_name for normalize imdb names
This commit is contained in:
parent
ad7e21e7a8
commit
adfe642547
1 changed files with 9 additions and 1 deletions
|
@ -16,6 +16,7 @@ from siteparser import SiteParser
|
||||||
import duckduckgo
|
import duckduckgo
|
||||||
|
|
||||||
from ..utils import datetime
|
from ..utils import datetime
|
||||||
|
from ..geo import normalize_country_name
|
||||||
|
|
||||||
def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None, unicode=False):
|
def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None, unicode=False):
|
||||||
headers = headers.copy()
|
headers = headers.copy()
|
||||||
|
@ -328,6 +329,10 @@ class Imdb(SiteParser):
|
||||||
isinstance(self['alternativeTitles'][0], basestring):
|
isinstance(self['alternativeTitles'][0], basestring):
|
||||||
self['alternativeTitles'] = [self['alternativeTitles']]
|
self['alternativeTitles'] = [self['alternativeTitles']]
|
||||||
|
|
||||||
|
#normalize country names
|
||||||
|
if 'country' in self:
|
||||||
|
self['country'] = [normalize_country_name(c) or c for c in self['country']]
|
||||||
|
|
||||||
types = {}
|
types = {}
|
||||||
stop_words = [
|
stop_words = [
|
||||||
'alternative spelling',
|
'alternative spelling',
|
||||||
|
@ -378,7 +383,10 @@ class Imdb(SiteParser):
|
||||||
regexps += [
|
regexps += [
|
||||||
"Hong Kong \(English title\)"
|
"Hong Kong \(English title\)"
|
||||||
]
|
]
|
||||||
english_countries = ('USA', 'UK', 'Australia', 'New Zealand')
|
english_countries = (
|
||||||
|
'USA', 'UK', 'United States', 'United Kingdom',
|
||||||
|
'Australia', 'New Zealand'
|
||||||
|
)
|
||||||
if not filter(lambda c: c in english_countries, self.get('country', [])):
|
if not filter(lambda c: c in english_countries, self.get('country', [])):
|
||||||
regexps += [
|
regexps += [
|
||||||
"^[^(]+ \(English title\)$",
|
"^[^(]+ \(English title\)$",
|
||||||
|
|
Loading…
Add table
Reference in a new issue