This commit is contained in:
rolux 2011-10-18 19:45:12 +02:00
commit 87f491e96e
3 changed files with 39 additions and 13 deletions

View file

@ -137,6 +137,8 @@ def decodeHtml(html):
"""
>>> decodeHtml('me & you and $&%')
u'me & you and $&%'
>>> decodeHtml('€')
u''
"""
if type(html) != unicode:
html = unicode(html)[:]
@ -146,7 +148,9 @@ def decodeHtml(html):
uchr = lambda value: value > 255 and unichr(value) or chr(value)
def entitydecode(match, uchr=uchr):
entity = match.group(1)
if entity.startswith('#x'):
if entity == '#x80':
return u''
elif entity.startswith('#x'):
return uchr(int(entity[2:], 16))
elif entity.startswith('#'):
return uchr(int(entity[1:]))

View file

@ -61,7 +61,7 @@ def parse_movie_path(path):
director = []
#extension/language
fileparts = parts[-1].split('.')
fileparts = [x.replace('||', '. ') for x in parts[-1].replace('. ', '||').split('.')]
extension = fileparts[-1]
if len(fileparts[-2]) == 2:
@ -152,7 +152,7 @@ def get_oxid(title, director=[], year='',
return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()
director = ', '.join(director)
episode_director = ', '.join(episode_director)
if not episode:
if not episode and not episode_title:
oxid = get_hash(director)[:8] + get_hash('\n'.join([title, str(year)]))[:8]
else:
oxid = get_hash('\n'.join([director, title, str(year), str(season)]))[:8] + \

View file

@ -88,7 +88,7 @@ class Imdb(SiteParser):
'creator': {
'page': 'combined',
're': [
'<h5>Creators:</h5>.*?<div class="info-content">(.*?)</div>',
'<h5>Creator.?:</h5>.*?<div class="info-content">(.*?)</div>',
'<a href="/name/.*?>(.*?)</a>'
],
'type': 'list'
@ -102,6 +102,14 @@ class Imdb(SiteParser):
],
'type': 'list'
},
'_director': {
'page': 'combined',
're': [
'<h5>Director:</h5>.*?<div class="info-content">(.*?)</div>',
'<a href="/name/.*?>(.*?)</a>'
],
'type': 'list'
},
'editor': {
'page': 'combined',
're': [
@ -266,11 +274,12 @@ class Imdb(SiteParser):
#only list one country per alternative title
def is_international_title(t):
if 'script title' in t[1].lower(): return False
if 'recut version' in t[1].lower(): return False
if 'working title' in t[1].lower(): return False
if 'complete title' in t[1].lower(): return False
if t[1].lower() == 'usa': return True
if 'international' in t[1].lower(): return True
if 'international (english title)' in t[1].lower(): return True
#fails if orignial is english... Japan (English title)
#if 'english title' in t[1].lower(): return True
return False
@ -328,10 +337,10 @@ class Imdb(SiteParser):
for key in ('country', 'genre'):
if key in self:
self[key] = filter(lambda x: x.lower() != 'home', self[key])
#0092999
if '_director' in self:
self['creator'] = self.pop('_director')
if 'creator' in self:
self['episodeDirector'] = self['director']
self['director'] = self['creator']
if 'series' in self:
if 'episodeTitle' in self:
self['seriesTitle'] = self['title']
@ -340,18 +349,29 @@ class Imdb(SiteParser):
self['title'] = "%s (S%02dE%02d) %s" % (
self['seriesTitle'], self['season'], self['episode'], self['episodeTitle'])
for key in ('Director', 'Year'):
if key in self:
self['episode%s'%key] = self[key.lowe()]
if key.lower() in self:
self['episode%s'%key] = self[key.lower()]
series = Imdb(self['series'])
for key in ['director', 'year']:
if not 'creator' in series and 'director' in series:
series['creator'] = series['director']
if len(series['creator']) > 10:
series['creator'] = series['director'][:1]
for key in ['creator', 'year', 'country']:
if key in series:
self[key] = series[key]
if 'originalTitle' in self:
del self['originalTitle']
else:
for key in ('seriesTitle', 'episodeTitle', 'season', 'episode'):
if key in self:
del self[key]
if 'creator' in self:
if 'director' in self:
self['episodeDirector'] = self['director']
self['director'] = self['creator']
if 'budget' in self and 'gross' in self:
self['profit'] = self['gross'] - self['budget']
@ -359,6 +379,8 @@ class Imdb(SiteParser):
if 'releaseDate' in self:
if isinstance(self['releaseDate'], list):
self['releaseDate'] = min(self['releaseDate'])
if 'summary' in self:
self['summary'] = self['summary'].split('</p')[0].strip()
class ImdbCombined(Imdb):
def __init__(self, id, timeout=-1):
@ -510,8 +532,8 @@ def getMoviePoster(imdbId):
'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
'''
info = ImdbCombined(imdbId)
if 'poster_id' in info:
url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['poster_id'], imdbId)
if 'posterId' in info:
url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['posterId'], imdbId)
data = readUrl(url)
poster = findRe(data, 'img id="primary-img".*?src="(.*?)"')
return poster