merge
This commit is contained in:
commit
87f491e96e
3 changed files with 39 additions and 13 deletions
|
@ -137,6 +137,8 @@ def decodeHtml(html):
|
||||||
"""
|
"""
|
||||||
>>> decodeHtml('me & you and $&%')
|
>>> decodeHtml('me & you and $&%')
|
||||||
u'me & you and $&%'
|
u'me & you and $&%'
|
||||||
|
>>> decodeHtml('€')
|
||||||
|
u'€'
|
||||||
"""
|
"""
|
||||||
if type(html) != unicode:
|
if type(html) != unicode:
|
||||||
html = unicode(html)[:]
|
html = unicode(html)[:]
|
||||||
|
@ -146,7 +148,9 @@ def decodeHtml(html):
|
||||||
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
uchr = lambda value: value > 255 and unichr(value) or chr(value)
|
||||||
def entitydecode(match, uchr=uchr):
|
def entitydecode(match, uchr=uchr):
|
||||||
entity = match.group(1)
|
entity = match.group(1)
|
||||||
if entity.startswith('#x'):
|
if entity == '#x80':
|
||||||
|
return u'€'
|
||||||
|
elif entity.startswith('#x'):
|
||||||
return uchr(int(entity[2:], 16))
|
return uchr(int(entity[2:], 16))
|
||||||
elif entity.startswith('#'):
|
elif entity.startswith('#'):
|
||||||
return uchr(int(entity[1:]))
|
return uchr(int(entity[1:]))
|
||||||
|
|
|
@ -61,7 +61,7 @@ def parse_movie_path(path):
|
||||||
director = []
|
director = []
|
||||||
|
|
||||||
#extension/language
|
#extension/language
|
||||||
fileparts = parts[-1].split('.')
|
fileparts = [x.replace('||', '. ') for x in parts[-1].replace('. ', '||').split('.')]
|
||||||
extension = fileparts[-1]
|
extension = fileparts[-1]
|
||||||
|
|
||||||
if len(fileparts[-2]) == 2:
|
if len(fileparts[-2]) == 2:
|
||||||
|
@ -152,7 +152,7 @@ def get_oxid(title, director=[], year='',
|
||||||
return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()
|
return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()
|
||||||
director = ', '.join(director)
|
director = ', '.join(director)
|
||||||
episode_director = ', '.join(episode_director)
|
episode_director = ', '.join(episode_director)
|
||||||
if not episode:
|
if not episode and not episode_title:
|
||||||
oxid = get_hash(director)[:8] + get_hash('\n'.join([title, str(year)]))[:8]
|
oxid = get_hash(director)[:8] + get_hash('\n'.join([title, str(year)]))[:8]
|
||||||
else:
|
else:
|
||||||
oxid = get_hash('\n'.join([director, title, str(year), str(season)]))[:8] + \
|
oxid = get_hash('\n'.join([director, title, str(year), str(season)]))[:8] + \
|
||||||
|
|
|
@ -88,7 +88,7 @@ class Imdb(SiteParser):
|
||||||
'creator': {
|
'creator': {
|
||||||
'page': 'combined',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
'<h5>Creators:</h5>.*?<div class="info-content">(.*?)</div>',
|
'<h5>Creator.?:</h5>.*?<div class="info-content">(.*?)</div>',
|
||||||
'<a href="/name/.*?>(.*?)</a>'
|
'<a href="/name/.*?>(.*?)</a>'
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
|
@ -102,6 +102,14 @@ class Imdb(SiteParser):
|
||||||
],
|
],
|
||||||
'type': 'list'
|
'type': 'list'
|
||||||
},
|
},
|
||||||
|
'_director': {
|
||||||
|
'page': 'combined',
|
||||||
|
're': [
|
||||||
|
'<h5>Director:</h5>.*?<div class="info-content">(.*?)</div>',
|
||||||
|
'<a href="/name/.*?>(.*?)</a>'
|
||||||
|
],
|
||||||
|
'type': 'list'
|
||||||
|
},
|
||||||
'editor': {
|
'editor': {
|
||||||
'page': 'combined',
|
'page': 'combined',
|
||||||
're': [
|
're': [
|
||||||
|
@ -266,11 +274,12 @@ class Imdb(SiteParser):
|
||||||
#only list one country per alternative title
|
#only list one country per alternative title
|
||||||
|
|
||||||
def is_international_title(t):
|
def is_international_title(t):
|
||||||
|
if 'script title' in t[1].lower(): return False
|
||||||
if 'recut version' in t[1].lower(): return False
|
if 'recut version' in t[1].lower(): return False
|
||||||
if 'working title' in t[1].lower(): return False
|
if 'working title' in t[1].lower(): return False
|
||||||
if 'complete title' in t[1].lower(): return False
|
if 'complete title' in t[1].lower(): return False
|
||||||
if t[1].lower() == 'usa': return True
|
if t[1].lower() == 'usa': return True
|
||||||
if 'international' in t[1].lower(): return True
|
if 'international (english title)' in t[1].lower(): return True
|
||||||
#fails if orignial is english... Japan (English title)
|
#fails if orignial is english... Japan (English title)
|
||||||
#if 'english title' in t[1].lower(): return True
|
#if 'english title' in t[1].lower(): return True
|
||||||
return False
|
return False
|
||||||
|
@ -328,10 +337,10 @@ class Imdb(SiteParser):
|
||||||
for key in ('country', 'genre'):
|
for key in ('country', 'genre'):
|
||||||
if key in self:
|
if key in self:
|
||||||
self[key] = filter(lambda x: x.lower() != 'home', self[key])
|
self[key] = filter(lambda x: x.lower() != 'home', self[key])
|
||||||
|
#0092999
|
||||||
|
if '_director' in self:
|
||||||
|
self['creator'] = self.pop('_director')
|
||||||
|
|
||||||
if 'creator' in self:
|
|
||||||
self['episodeDirector'] = self['director']
|
|
||||||
self['director'] = self['creator']
|
|
||||||
if 'series' in self:
|
if 'series' in self:
|
||||||
if 'episodeTitle' in self:
|
if 'episodeTitle' in self:
|
||||||
self['seriesTitle'] = self['title']
|
self['seriesTitle'] = self['title']
|
||||||
|
@ -340,18 +349,29 @@ class Imdb(SiteParser):
|
||||||
self['title'] = "%s (S%02dE%02d) %s" % (
|
self['title'] = "%s (S%02dE%02d) %s" % (
|
||||||
self['seriesTitle'], self['season'], self['episode'], self['episodeTitle'])
|
self['seriesTitle'], self['season'], self['episode'], self['episodeTitle'])
|
||||||
for key in ('Director', 'Year'):
|
for key in ('Director', 'Year'):
|
||||||
if key in self:
|
if key.lower() in self:
|
||||||
self['episode%s'%key] = self[key.lowe()]
|
self['episode%s'%key] = self[key.lower()]
|
||||||
series = Imdb(self['series'])
|
series = Imdb(self['series'])
|
||||||
for key in ['director', 'year']:
|
|
||||||
|
if not 'creator' in series and 'director' in series:
|
||||||
|
series['creator'] = series['director']
|
||||||
|
if len(series['creator']) > 10:
|
||||||
|
series['creator'] = series['director'][:1]
|
||||||
|
|
||||||
|
for key in ['creator', 'year', 'country']:
|
||||||
if key in series:
|
if key in series:
|
||||||
self[key] = series[key]
|
self[key] = series[key]
|
||||||
|
|
||||||
if 'originalTitle' in self:
|
if 'originalTitle' in self:
|
||||||
del self['originalTitle']
|
del self['originalTitle']
|
||||||
else:
|
else:
|
||||||
for key in ('seriesTitle', 'episodeTitle', 'season', 'episode'):
|
for key in ('seriesTitle', 'episodeTitle', 'season', 'episode'):
|
||||||
if key in self:
|
if key in self:
|
||||||
del self[key]
|
del self[key]
|
||||||
|
if 'creator' in self:
|
||||||
|
if 'director' in self:
|
||||||
|
self['episodeDirector'] = self['director']
|
||||||
|
self['director'] = self['creator']
|
||||||
|
|
||||||
if 'budget' in self and 'gross' in self:
|
if 'budget' in self and 'gross' in self:
|
||||||
self['profit'] = self['gross'] - self['budget']
|
self['profit'] = self['gross'] - self['budget']
|
||||||
|
@ -359,6 +379,8 @@ class Imdb(SiteParser):
|
||||||
if 'releaseDate' in self:
|
if 'releaseDate' in self:
|
||||||
if isinstance(self['releaseDate'], list):
|
if isinstance(self['releaseDate'], list):
|
||||||
self['releaseDate'] = min(self['releaseDate'])
|
self['releaseDate'] = min(self['releaseDate'])
|
||||||
|
if 'summary' in self:
|
||||||
|
self['summary'] = self['summary'].split('</p')[0].strip()
|
||||||
|
|
||||||
class ImdbCombined(Imdb):
|
class ImdbCombined(Imdb):
|
||||||
def __init__(self, id, timeout=-1):
|
def __init__(self, id, timeout=-1):
|
||||||
|
@ -510,8 +532,8 @@ def getMoviePoster(imdbId):
|
||||||
'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
|
'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
|
||||||
'''
|
'''
|
||||||
info = ImdbCombined(imdbId)
|
info = ImdbCombined(imdbId)
|
||||||
if 'poster_id' in info:
|
if 'posterId' in info:
|
||||||
url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['poster_id'], imdbId)
|
url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['posterId'], imdbId)
|
||||||
data = readUrl(url)
|
data = readUrl(url)
|
||||||
poster = findRe(data, 'img id="primary-img".*?src="(.*?)"')
|
poster = findRe(data, 'img id="primary-img".*?src="(.*?)"')
|
||||||
return poster
|
return poster
|
||||||
|
|
Loading…
Reference in a new issue