This commit is contained in:
parent
c69a2ee825
commit
da72fbdaed
2 changed files with 38 additions and 24 deletions
|
@ -208,7 +208,6 @@ def get_sort_name(name):
|
||||||
|
|
||||||
>>> get_sort_name('Scorsese, Martin')
|
>>> get_sort_name('Scorsese, Martin')
|
||||||
'Scorsese, Martin'
|
'Scorsese, Martin'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
if not ' ' in name or ', ' in name:
|
if not ' ' in name or ', ' in name:
|
||||||
return name
|
return name
|
||||||
|
|
|
@ -232,7 +232,7 @@ class Imdb(SiteParser):
|
||||||
're': '<span class="tv-extra">(TV series)</span>',
|
're': '<span class="tv-extra">(TV series)</span>',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
},
|
},
|
||||||
'originalTitle': {
|
'title': {
|
||||||
'page': 'combined',
|
'page': 'combined',
|
||||||
're': '<h1>(.*?) <span>',
|
're': '<h1>(.*?) <span>',
|
||||||
'type': 'string'
|
'type': 'string'
|
||||||
|
@ -283,23 +283,28 @@ class Imdb(SiteParser):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
super(Imdb, self).__init__(0)
|
super(Imdb, self).__init__(0)
|
||||||
|
|
||||||
#only list one country per alternative title
|
for t in self.get('alternativeTitles', []):
|
||||||
|
for type in t[1].lower().split('/'):
|
||||||
def is_international_title(t):
|
type = type.strip()
|
||||||
if 'script title' in t[1].lower(): return False
|
for regexp in (
|
||||||
if 'recut version' in t[1].lower(): return False
|
"^.+ \(imdb display title\) \(English title\)$",
|
||||||
if 'working title' in t[1].lower(): return False
|
"^International \(English title\)$",
|
||||||
if 'complete title' in t[1].lower(): return False
|
"^.+ \(English title\)$",
|
||||||
if 'usa (imdb display title)' in t[1].lower(): return True
|
"^International \(.+\) \(English title\)$",
|
||||||
if t[1].lower() == 'usa': return True
|
"^.+ \(.+\) \(English title\)$",
|
||||||
if 'international (english title)' in t[1].lower(): return True
|
"^USA$",
|
||||||
#fails if orignial is english... Japan (English title)
|
"^UK$",
|
||||||
#if 'english title' in t[1].lower(): return True
|
"^USA \(.+\)$",
|
||||||
return False
|
"^UK \(.+\)$",
|
||||||
ititle = filter(is_international_title, self.get('alternativeTitles', []))
|
"^International \(.+ title\)$",
|
||||||
if ititle:
|
):
|
||||||
self['englishTitle'] = ititle[0][0]
|
if re.compile(regexp).findall(type):
|
||||||
self['title'] = self.get('englishTitle', self['originalTitle'])
|
self['internationalTitle'] = t[0]
|
||||||
|
break
|
||||||
|
if 'internationalTitle' in self:
|
||||||
|
break
|
||||||
|
if 'internationalTitle' in self:
|
||||||
|
break
|
||||||
|
|
||||||
def cleanup_title(title):
|
def cleanup_title(title):
|
||||||
if title.startswith('"') and title.endswith('"'):
|
if title.startswith('"') and title.endswith('"'):
|
||||||
|
@ -307,17 +312,27 @@ class Imdb(SiteParser):
|
||||||
title = re.sub('\(\#[.\d]+\)', '', title)
|
title = re.sub('\(\#[.\d]+\)', '', title)
|
||||||
return title.strip()
|
return title.strip()
|
||||||
|
|
||||||
for t in ('title', 'englishTitle', 'originalTitle'):
|
for t in ('title', 'internationalTitle'):
|
||||||
if t in self:
|
if t in self:
|
||||||
self[t] = cleanup_title(self[t])
|
self[t] = cleanup_title(self[t])
|
||||||
if 'alternativeTitles' in self:
|
if 'alternativeTitles' in self:
|
||||||
if len(self['alternativeTitles']) == 2 and \
|
if len(self['alternativeTitles']) == 2 and \
|
||||||
isinstance(self['alternativeTitles'][0], basestring):
|
isinstance(self['alternativeTitles'][0], basestring):
|
||||||
self['alternativeTitles'] = [self['alternativeTitles']]
|
self['alternativeTitles'] = [self['alternativeTitles']]
|
||||||
self['alternativeTitles'] = [[cleanup_title(t[0]),
|
alt = {}
|
||||||
t[1].split(' / ')[0].split('(')[0].strip()]
|
for t in self['alternativeTitles']:
|
||||||
for t in self['alternativeTitles']]
|
title = cleanup_title(t[0])
|
||||||
#self[t] = re.sub('\(\#[.\d]+\)', '', self[t])
|
if title not in (self.get('title'), self.get('internationalTitle')):
|
||||||
|
if title not in alt:
|
||||||
|
alt[title] = []
|
||||||
|
for c in t[1].split('/'):
|
||||||
|
c = c.replace('International', '').split('(')[0].strip()
|
||||||
|
if c:
|
||||||
|
alt[title].append(c)
|
||||||
|
self['alternativeTitles'] = []
|
||||||
|
for t in sorted(alt, lambda a, b: cmp(sorted(alt[a]), sorted(alt[b]))):
|
||||||
|
if alt[t]:
|
||||||
|
self['alternativeTitles'].append((t, sorted(alt[t])))
|
||||||
|
|
||||||
if 'runtime' in self and self['runtime']:
|
if 'runtime' in self and self['runtime']:
|
||||||
if 'min' in self['runtime']: base=60
|
if 'min' in self['runtime']: base=60
|
||||||
|
|
Loading…
Reference in a new issue