2009-06-08 16:08:59 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
|
|
#
|
2010-12-22 15:17:38 +00:00
|
|
|
from decimal import Decimal
|
2009-06-08 16:08:59 +00:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import hashlib
|
2010-11-14 18:58:33 +00:00
|
|
|
import unicodedata
|
2009-06-08 16:08:59 +00:00
|
|
|
|
2010-07-07 22:46:41 +00:00
|
|
|
import ox
|
|
|
|
import ox.iso
|
2011-01-01 11:44:42 +00:00
|
|
|
from ox.normalize import normalizeName, normalizeTitle
|
2009-06-08 16:08:59 +00:00
|
|
|
|
2010-08-24 17:16:33 +00:00
|
|
|
|
2010-12-22 15:17:38 +00:00
|
|
|
def parse_decimal(string):
|
|
|
|
string = string.replace(':', '/')
|
|
|
|
if '/' not in string:
|
|
|
|
string = '%s/1' % string
|
|
|
|
d = string.split('/')
|
|
|
|
return Decimal(d[0]) / Decimal(d[1])
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2010-08-24 17:16:33 +00:00
|
|
|
def plural_key(term):
|
|
|
|
return {
|
|
|
|
'country': 'countries',
|
|
|
|
}.get(term, term + 's')
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2010-07-12 14:56:14 +00:00
|
|
|
def oxid(title, directors, year='', seriesTitle='', episodeTitle='', season=0, episode=0):
|
|
|
|
director = ', '.join(directors)
|
2009-06-08 16:08:59 +00:00
|
|
|
oxid_value = u"\n".join([title, director, year])
|
|
|
|
oxid = hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()
|
|
|
|
if seriesTitle:
|
|
|
|
oxid_value = u"\n".join([seriesTitle, "%02d" % season])
|
|
|
|
oxid = hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()[:20]
|
|
|
|
oxid_value = u"\n".join(["%02d" % episode, episodeTitle, director, year])
|
|
|
|
oxid += hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()[:20]
|
|
|
|
return u"0x" + oxid
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2011-01-04 06:50:52 +00:00
|
|
|
def oxdb_id(title, director=[], year='', season='', episode='', episode_title='', episode_director=[], episode_year=''):
|
2010-09-17 14:11:37 +00:00
|
|
|
# new id function, will replace oxid()
|
|
|
|
def get_hash(string):
|
|
|
|
return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()
|
2011-01-04 06:50:52 +00:00
|
|
|
director = ', '.join(director)
|
|
|
|
episode_director = ', '.join(episode_director)
|
2010-09-17 14:11:37 +00:00
|
|
|
if not episode:
|
2010-12-07 18:35:55 +00:00
|
|
|
oxdb_id = get_hash(director)[:8] + get_hash('\n'.join([title, str(year)]))[:8]
|
2010-09-17 14:11:37 +00:00
|
|
|
else:
|
2010-12-07 18:35:55 +00:00
|
|
|
oxdb_id = get_hash('\n'.join([director, title, str(year), str(season)]))[:8] + \
|
|
|
|
get_hash('\n'.join([str(episode), episode_director, episode_title, str(episode_year)]))[:8]
|
2010-09-17 14:11:37 +00:00
|
|
|
return u'0x' + oxdb_id
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2010-07-12 14:56:14 +00:00
|
|
|
def oxdb_directors(director):
|
2010-12-08 00:30:45 +00:00
|
|
|
director = os.path.basename(os.path.dirname(director))
|
2009-10-04 22:00:08 +00:00
|
|
|
if director.endswith('_'):
|
|
|
|
director = "%s." % director[:-1]
|
2010-07-12 14:56:14 +00:00
|
|
|
directors = [normalizeName(d) for d in director.split('; ')]
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2010-07-12 14:56:14 +00:00
|
|
|
def cleanup(director):
|
|
|
|
director = director.strip()
|
|
|
|
director = director.replace('Series', '')
|
|
|
|
director = director.replace('Unknown Director', '')
|
|
|
|
director = director.replace('Various Directors', '')
|
|
|
|
return director
|
|
|
|
directors = filter(None, [cleanup(d) for d in directors])
|
|
|
|
return directors
|
2009-10-04 22:00:08 +00:00
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2009-10-04 22:00:08 +00:00
|
|
|
def oxdb_title(_title, searchTitle = False):
|
|
|
|
'''
|
2010-09-23 16:01:48 +00:00
|
|
|
normalize filename to get item title
|
2009-10-04 22:00:08 +00:00
|
|
|
'''
|
|
|
|
_title = os.path.basename(_title)
|
|
|
|
_title = _title.replace('... ', '_dot_dot_dot_')
|
|
|
|
_title = _title.replace('. ', '_dot__space_')
|
|
|
|
_title = _title.replace(' .', '_space__dot_')
|
|
|
|
title = _title.split('.')[0]
|
2010-12-23 09:02:15 +00:00
|
|
|
title = re.sub('([A-Za-z0-9])_ ', '\\1: ', title)
|
2009-10-04 22:00:08 +00:00
|
|
|
se = re.compile('Season (\d+).Episode (\d+)').findall(_title)
|
|
|
|
if se:
|
|
|
|
se = "S%02dE%02d" % (int(se[0][0]), int(se[0][1]))
|
|
|
|
if 'Part' in _title.split('.')[-2] and 'Episode' not in _title.split('.')[-3]:
|
|
|
|
stitle = _title.split('.')[-3]
|
|
|
|
else:
|
|
|
|
stitle = _title.split('.')[-2]
|
|
|
|
if stitle.startswith('Episode '):
|
2011-01-01 11:44:42 +00:00
|
|
|
stitle = ''
|
2009-10-04 22:00:08 +00:00
|
|
|
if searchTitle:
|
|
|
|
title = '"%s" %s' % (title, stitle)
|
|
|
|
else:
|
|
|
|
title = '%s (%s) %s' % (title, se, stitle)
|
|
|
|
title = title.strip()
|
|
|
|
title = title.replace('_dot_dot_dot_', '... ')
|
|
|
|
title = title.replace('_dot__space_', '. ')
|
|
|
|
title = title.replace('_space__dot_', ' .')
|
2010-08-07 14:31:20 +00:00
|
|
|
year = ox.findRe(title, '(\(\d{4}\))')
|
2010-12-01 12:21:23 +00:00
|
|
|
if year and title.endswith(year):
|
2010-08-07 14:31:20 +00:00
|
|
|
title = title[:-len(year)].strip()
|
|
|
|
title = normalizeTitle(title)
|
2009-10-04 22:00:08 +00:00
|
|
|
return title
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2009-10-04 22:00:08 +00:00
|
|
|
def oxdb_year(data):
|
2010-07-07 22:46:41 +00:00
|
|
|
return ox.findRe(data, '\.(\d{4})\.')
|
2009-10-04 22:00:08 +00:00
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2009-10-04 22:00:08 +00:00
|
|
|
def oxdb_series_title(path):
|
|
|
|
seriesTitle = u''
|
|
|
|
if path.startswith('Series'):
|
2010-12-08 00:30:45 +00:00
|
|
|
seriesTitle = os.path.basename(path)
|
2009-10-04 22:00:08 +00:00
|
|
|
else:
|
|
|
|
t = oxdb_title(path)
|
|
|
|
if " (S" in t:
|
|
|
|
seriesTitle = t.split(" (S")[0]
|
|
|
|
return seriesTitle
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2009-10-04 22:00:08 +00:00
|
|
|
def oxdb_episode_title(path):
|
|
|
|
episodeTitle = u''
|
|
|
|
ep = re.compile('.Episode \d+?\.(.*?)\.[a-zA-Z]').findall(path)
|
|
|
|
if ep:
|
2010-01-22 23:57:06 +00:00
|
|
|
episodeTitle = ep[0]
|
2009-10-04 22:00:08 +00:00
|
|
|
return episodeTitle
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2009-10-04 22:00:08 +00:00
|
|
|
def oxdb_season_episode(path):
|
|
|
|
season = 0
|
|
|
|
episode = 0
|
|
|
|
path = os.path.basename(path)
|
|
|
|
se = re.compile('Season (\d+).Episode (\d+)').findall(path)
|
|
|
|
if se:
|
|
|
|
season = int(se[0][0])
|
|
|
|
episode = int(se[0][1])
|
|
|
|
else:
|
|
|
|
ep = re.compile('.Episode (\d+?)').findall(path)
|
|
|
|
if ep:
|
|
|
|
episode = int(ep[0][0])
|
|
|
|
if season == 0 and episode == 0:
|
|
|
|
se = re.compile('S(\d\d)E(\d\d)').findall(path)
|
|
|
|
if se:
|
|
|
|
season = int(se[0][0])
|
|
|
|
episode = int(se[0][1])
|
|
|
|
return (season, episode)
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2009-10-04 22:00:08 +00:00
|
|
|
def oxdb_part(path):
|
|
|
|
part = 1
|
|
|
|
path = path.lower()
|
|
|
|
p = re.compile('part\s*?(\d+)\.').findall(path)
|
|
|
|
if p:
|
|
|
|
part = p[0]
|
|
|
|
else:
|
|
|
|
p = re.compile('cd\s*?(\d+)\.').findall(path)
|
|
|
|
if p:
|
|
|
|
part = p[0]
|
|
|
|
return part
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2010-08-07 14:31:20 +00:00
|
|
|
def parse_path(path):
|
2010-12-08 00:30:45 +00:00
|
|
|
'''
|
|
|
|
expects path in the form
|
|
|
|
L/Last, First/Title (YYYY)
|
|
|
|
M/McCarthy, Thomas/The Visitor (2007)
|
|
|
|
G/Godard, Jean-Luc/Histoire(s) du cinema_ Toutes les histoires (1988)
|
|
|
|
'''
|
2010-07-07 22:46:41 +00:00
|
|
|
import ox.web.imdb
|
2009-10-04 22:00:08 +00:00
|
|
|
search_title = oxdb_title(path, True)
|
|
|
|
r = {}
|
|
|
|
r['title'] = oxdb_title(path)
|
2010-07-12 14:56:14 +00:00
|
|
|
r['directors'] = oxdb_directors(path)
|
2010-08-07 14:31:20 +00:00
|
|
|
year = ox.findRe(path, '\((\d{4})\)')
|
|
|
|
if year:
|
|
|
|
r['year'] = year
|
|
|
|
|
|
|
|
#FIXME: only include it its actually a series
|
2009-10-04 22:00:08 +00:00
|
|
|
r['episode_title'] = oxdb_episode_title(path)
|
|
|
|
r['season'], r['episode'] = oxdb_season_episode(path)
|
2010-07-12 14:56:14 +00:00
|
|
|
r['series_title'] = oxdb_series_title(path)
|
2010-08-07 14:31:20 +00:00
|
|
|
|
2010-07-12 14:56:14 +00:00
|
|
|
r['imdbId'] = ox.web.imdb.guess(search_title, ', '.join(r['directors']), timeout=-1)
|
2010-11-30 23:33:42 +00:00
|
|
|
r['oxdbId'] = oxdb_id(r['title'], r['directors'], r.get('year', ''),
|
|
|
|
r.get('season', ''), r.get('episode', ''),
|
|
|
|
episode_title=r['episode_title'],
|
2011-01-16 13:28:57 +00:00
|
|
|
episode_director=[],
|
2010-11-30 23:33:42 +00:00
|
|
|
episode_year='')
|
2009-10-04 22:00:08 +00:00
|
|
|
return r
|
|
|
|
|
2011-01-01 11:44:42 +00:00
|
|
|
|
2011-01-03 19:45:56 +00:00
|
|
|
def sort_string(string):
|
|
|
|
string = string.replace(u'Þ', 'Th')
|
|
|
|
return unicodedata.normalize('NFKD', string)
|
|
|
|
|
|
|
|
|
2010-09-03 13:28:44 +00:00
|
|
|
def sort_title(title):
|
|
|
|
#title
|
|
|
|
title = re.sub(u'[\'!¿¡,\.;\-"\:\*\[\]]', '', title)
|
2010-11-14 18:58:33 +00:00
|
|
|
|
|
|
|
#title = title.replace(u'Æ', 'Ae')
|
2010-11-26 17:16:57 +00:00
|
|
|
if isinstance(title, str):
|
|
|
|
title = unicode(title)
|
2011-01-03 19:45:56 +00:00
|
|
|
title = sort_string(title)
|
2010-11-14 18:58:33 +00:00
|
|
|
|
2010-09-03 13:28:44 +00:00
|
|
|
#pad numbered titles
|
|
|
|
title = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), title)
|
|
|
|
return title.strip()
|
2011-01-13 19:40:50 +00:00
|
|
|
|
|
|
|
def get_positions(ids, pos):
|
|
|
|
'''
|
|
|
|
>>> get_positions([1,2,3,4], [2,4])
|
|
|
|
{2: 1, 4: 3}
|
|
|
|
'''
|
|
|
|
positions = {}
|
|
|
|
for i in pos:
|
|
|
|
try:
|
|
|
|
positions[i] = ids.index(i)
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
return positions
|