cablegates/pandora/item/utils.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
#
import errno
import os
import sys
import re
import hashlib
import unicodedata

import ox
import ox.iso
from ox.normalize import normalizeName, normalizeTitle, canonicalTitle


def plural_key(term):
    return {
        'country': 'countries',
    }.get(term, term + 's')

def oxid(title, directors, year='', seriesTitle='', episodeTitle='', season=0, episode=0):
    director = ', '.join(directors)
    oxid_value = u"\n".join([title, director, year])
    oxid = hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()
    if seriesTitle:
        oxid_value = u"\n".join([seriesTitle, "%02d" % season])
        oxid = hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()[:20]
        oxid_value = u"\n".join(["%02d" % episode, episodeTitle, director, year])
        oxid += hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()[:20]
    return u"0x" + oxid

def oxdb_id(title, directors=[], year='', season='', episode='', episode_title='', episode_director='', episode_year=''):
    # new id function, will replace oxid()
    def get_hash(string):
        return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()
    director = ', '.join(directors)
    if not episode:
        oxdb_id = get_hash(director)[:8] + get_hash('\n'.join([title, year]))[:8]
    else:
        oxdb_id = get_hash('\n'.join([director, title, year, season]))[:8] + \
            get_hash('\n'.join([episode, episode_director, episode_title, episode_year]))[:8]
    return u'0x' + oxdb_id

def oxdb_directors(director):
    director = os.path.basename(os.path.dirname(director))
    if director.endswith('_'):
        director = "%s." % director[:-1]
    directors = [normalizeName(d) for d in director.split('; ')]
    def cleanup(director):
        director = director.strip()
        director = director.replace('Series', '')
        director = director.replace('Unknown Director', '')
        director = director.replace('Various Directors', '')
        return director
    directors = filter(None, [cleanup(d) for d in directors])
    return directors

def oxdb_title(_title, searchTitle = False):
    '''
      normalize filename to get item title
    '''
    _title = os.path.basename(_title)
    _title = _title.replace('... ', '_dot_dot_dot_')
    _title = _title.replace('. ', '_dot__space_')
    _title = _title.replace(' .', '_space__dot_')
    title = _title.split('.')[0]
    title = re.sub('([a-z0-9])_ ', '\\1: ', title)
    se = re.compile('Season (\d+).Episode (\d+)').findall(_title)
    if se:
        se = "S%02dE%02d" % (int(se[0][0]), int(se[0][1]))
        if 'Part' in _title.split('.')[-2] and 'Episode' not in _title.split('.')[-3]:
            stitle = _title.split('.')[-3]
        else:
            stitle = _title.split('.')[-2]
        if stitle.startswith('Episode '):
            stitle = '' 
        if searchTitle:
            title = '"%s" %s' % (title, stitle)
        else:
            title = '%s (%s) %s' % (title, se, stitle)
            title = title.strip()
    title = title.replace('_dot_dot_dot_', '... ')
    title = title.replace('_dot__space_', '. ')
    title = title.replace('_space__dot_', ' .')
    year = ox.findRe(title, '(\(\d{4}\))')
    if title.endswith(year):
        title = title[:-len(year)].strip()
    title = normalizeTitle(title)
    return title

def oxdb_year(data):
    return ox.findRe(data, '\.(\d{4})\.')

def oxdb_series_title(path):
    seriesTitle = u''
    if path.startswith('Series'):
        seriesTitle = os.path.basename(os.path.dirname(path))
    else:
        t = oxdb_title(path)
        if " (S" in t:
            seriesTitle = t.split(" (S")[0]
    return seriesTitle

def oxdb_episode_title(path):
    episodeTitle = u''
    ep = re.compile('.Episode \d+?\.(.*?)\.[a-zA-Z]').findall(path)
    if ep:
        episodeTitle = ep[0]
    return episodeTitle

def oxdb_season_episode(path):
    season = 0
    episode = 0
    path = os.path.basename(path)
    se = re.compile('Season (\d+).Episode (\d+)').findall(path)
    if se:
        season = int(se[0][0])
        episode = int(se[0][1])
    else:
        ep = re.compile('.Episode (\d+?)').findall(path)
        if ep:
            episode = int(ep[0][0])
    if season == 0 and episode == 0:
        se = re.compile('S(\d\d)E(\d\d)').findall(path)
        if se:
            season = int(se[0][0])
            episode = int(se[0][1])
    return (season, episode)

def oxdb_part(path):
    part = 1
    path = path.lower()
    p = re.compile('part\s*?(\d+)\.').findall(path)
    if p:
        part = p[0]
    else:
        p = re.compile('cd\s*?(\d+)\.').findall(path)
        if p:
            part = p[0]
    return part

def parse_path(path):
    import ox.web.imdb
    search_title = oxdb_title(path, True)
    r = {}
    r['title'] = oxdb_title(path)
    r['directors'] = oxdb_directors(path)
    year = ox.findRe(path, '\((\d{4})\)')
    if year:
        r['year'] = year

    #FIXME: only include it its actually a series
    r['episode_title'] = oxdb_episode_title(path)
    r['season'], r['episode'] = oxdb_season_episode(path)
    r['series_title'] = oxdb_series_title(path)

    r['imdbId'] = ox.web.imdb.guess(search_title, ', '.join(r['directors']), timeout=-1)
    r['oxdbId'] = oxid(r['title'], r['directors'],
                       seriesTitle=r['series_title'],
                       episodeTitle=r['episode_title'],
                       season=r['season'], episode=r['episode'])
    return r

def sort_title(title):
    #title
    title = re.sub(u'[\'!¿¡,\.;\-"\:\*\[\]]', '', title)

    #title = title.replace(u'Æ', 'Ae')
    if isinstance(title, str):
        title = unicode(title)
    title = unicodedata.normalize('NFKD',title)

    #pad numbered titles
    title = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), title)
    return title.strip()
foo 2009-06-08 16:08:59 +00:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`
			`# vi:si:et:sw=4:sts=4:ts=4`
			`#`
			`import errno`
			`import os`
			`import sys`
			`import re`
			`import hashlib`
use unicodedata.normalize for sort_title 2010-11-14 18:58:33 +00:00			`import unicodedata`
foo 2009-06-08 16:08:59 +00:00
oxlib->ox, oxweb->ox.web 2010-07-07 22:46:41 +00:00			`import ox`
			`import ox.iso`
stream, videosupport 2010-09-03 13:28:44 +00:00			`from ox.normalize import normalizeName, normalizeTitle, canonicalTitle`
foo 2009-06-08 16:08:59 +00:00
uploads, transcodes, /ra 2010-08-24 17:16:33 +00:00
			`def plural_key(term):`
			`return {`
			`'country': 'countries',`
			`}.get(term, term + 's')`

- simplify metadata db structure - better sort values if field is empty - switch to python-ox, faster imdb import - move files backend into own app 2010-07-12 14:56:14 +00:00			`def oxid(title, directors, year='', seriesTitle='', episodeTitle='', season=0, episode=0):`
			`director = ', '.join(directors)`
foo 2009-06-08 16:08:59 +00:00			`oxid_value = u"\n".join([title, director, year])`
			`oxid = hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()`
			`if seriesTitle:`
			`oxid_value = u"\n".join([seriesTitle, "%02d" % season])`
			`oxid = hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()[:20]`
			`oxid_value = u"\n".join(["%02d" % episode, episodeTitle, director, year])`
			`oxid += hashlib.sha1(oxid_value.encode('utf-8')).hexdigest()[:20]`
			`return u"0x" + oxid`

adding new 0xDB Id function 2010-09-17 14:11:37 +00:00			`def oxdb_id(title, directors=[], year='', season='', episode='', episode_title='', episode_director='', episode_year=''):`
			`# new id function, will replace oxid()`
			`def get_hash(string):`
			`return hashlib.sha1(string.encode('utf-8')).hexdigest().upper()`
			`director = ', '.join(directors)`
			`if not episode:`
			`oxdb_id = get_hash(director)[:8] + get_hash('\n'.join([title, year]))[:8]`
			`else:`
			`oxdb_id = get_hash('\n'.join([director, title, year, season]))[:8] + \`
			`get_hash('\n'.join([episode, episode_director, episode_title, episode_year]))[:8]`
			`return u'0x' + oxdb_id`

- simplify metadata db structure - better sort values if field is empty - switch to python-ox, faster imdb import - move files backend into own app 2010-07-12 14:56:14 +00:00			`def oxdb_directors(director):`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`director = os.path.basename(os.path.dirname(director))`
			`if director.endswith('_'):`
			`director = "%s." % director[:-1]`
- simplify metadata db structure - better sort values if field is empty - switch to python-ox, faster imdb import - move files backend into own app 2010-07-12 14:56:14 +00:00			`directors = [normalizeName(d) for d in director.split('; ')]`
			`def cleanup(director):`
			`director = director.strip()`
			`director = director.replace('Series', '')`
			`director = director.replace('Unknown Director', '')`
			`director = director.replace('Various Directors', '')`
			`return director`
			`directors = filter(None, [cleanup(d) for d in directors])`
			`return directors`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00
			`def oxdb_title(_title, searchTitle = False):`
			`'''`
rename Movie to Item 2010-09-23 16:01:48 +00:00			`normalize filename to get item title`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`'''`
			`_title = os.path.basename(_title)`
			`_title = _title.replace('... ', '_dot_dot_dot_')`
			`_title = _title.replace('. ', '_dot__space_')`
			`_title = _title.replace(' .', '_space__dot_')`
			`title = _title.split('.')[0]`
			`title = re.sub('([a-z0-9])_ ', '\\1: ', title)`
			`se = re.compile('Season (\d+).Episode (\d+)').findall(_title)`
			`if se:`
			`se = "S%02dE%02d" % (int(se[0][0]), int(se[0][1]))`
			`if 'Part' in _title.split('.')[-2] and 'Episode' not in _title.split('.')[-3]:`
			`stitle = _title.split('.')[-3]`
			`else:`
			`stitle = _title.split('.')[-2]`
			`if stitle.startswith('Episode '):`
			`stitle = ''`
			`if searchTitle:`
			`title = '"%s" %s' % (title, stitle)`
			`else:`
			`title = '%s (%s) %s' % (title, se, stitle)`
			`title = title.strip()`
			`title = title.replace('_dot_dot_dot_', '... ')`
			`title = title.replace('_dot__space_', '. ')`
			`title = title.replace('_space__dot_', ' .')`
update files archive api 2010-08-07 14:31:20 +00:00			`year = ox.findRe(title, '(\(\d{4}\))')`
			`if title.endswith(year):`
			`title = title[:-len(year)].strip()`
			`title = normalizeTitle(title)`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`return title`

			`def oxdb_year(data):`
oxlib->ox, oxweb->ox.web 2010-07-07 22:46:41 +00:00			`return ox.findRe(data, '\.(\d{4})\.')`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00
			`def oxdb_series_title(path):`
			`seriesTitle = u''`
			`if path.startswith('Series'):`
			`seriesTitle = os.path.basename(os.path.dirname(path))`
			`else:`
			`t = oxdb_title(path)`
			`if " (S" in t:`
			`seriesTitle = t.split(" (S")[0]`
			`return seriesTitle`

			`def oxdb_episode_title(path):`
			`episodeTitle = u''`
			`ep = re.compile('.Episode \d+?\.(.*?)\.[a-zA-Z]').findall(path)`
			`if ep:`
file import cleanup 2010-01-22 23:57:06 +00:00			`episodeTitle = ep[0]`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`return episodeTitle`

			`def oxdb_season_episode(path):`
			`season = 0`
			`episode = 0`
			`path = os.path.basename(path)`
			`se = re.compile('Season (\d+).Episode (\d+)').findall(path)`
			`if se:`
			`season = int(se[0][0])`
			`episode = int(se[0][1])`
			`else:`
			`ep = re.compile('.Episode (\d+?)').findall(path)`
			`if ep:`
			`episode = int(ep[0][0])`
			`if season == 0 and episode == 0:`
			`se = re.compile('S(\d\d)E(\d\d)').findall(path)`
			`if se:`
			`season = int(se[0][0])`
			`episode = int(se[0][1])`
			`return (season, episode)`

			`def oxdb_part(path):`
			`part = 1`
			`path = path.lower()`
			`p = re.compile('part\s*?(\d+)\.').findall(path)`
			`if p:`
			`part = p[0]`
			`else:`
			`p = re.compile('cd\s*?(\d+)\.').findall(path)`
			`if p:`
			`part = p[0]`
			`return part`

update files archive api 2010-08-07 14:31:20 +00:00			`def parse_path(path):`
oxlib->ox, oxweb->ox.web 2010-07-07 22:46:41 +00:00			`import ox.web.imdb`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`search_title = oxdb_title(path, True)`
			`r = {}`
			`r['title'] = oxdb_title(path)`
- simplify metadata db structure - better sort values if field is empty - switch to python-ox, faster imdb import - move files backend into own app 2010-07-12 14:56:14 +00:00			`r['directors'] = oxdb_directors(path)`
update files archive api 2010-08-07 14:31:20 +00:00			`year = ox.findRe(path, '\((\d{4})\)')`
			`if year:`
			`r['year'] = year`

			`#FIXME: only include it its actually a series`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`r['episode_title'] = oxdb_episode_title(path)`
			`r['season'], r['episode'] = oxdb_season_episode(path)`
- simplify metadata db structure - better sort values if field is empty - switch to python-ox, faster imdb import - move files backend into own app 2010-07-12 14:56:14 +00:00			`r['series_title'] = oxdb_series_title(path)`
update files archive api 2010-08-07 14:31:20 +00:00
- simplify metadata db structure - better sort values if field is empty - switch to python-ox, faster imdb import - move files backend into own app 2010-07-12 14:56:14 +00:00			`r['imdbId'] = ox.web.imdb.guess(search_title, ', '.join(r['directors']), timeout=-1)`
			`r['oxdbId'] = oxid(r['title'], r['directors'],`
			`seriesTitle=r['series_title'],`
			`episodeTitle=r['episode_title'],`
			`season=r['season'], episode=r['episode'])`
testing interface, more work on backend 2009-10-04 22:00:08 +00:00			`return r`

stream, videosupport 2010-09-03 13:28:44 +00:00			`def sort_title(title):`
			`#title`
			`title = re.sub(u'[\'!¿¡,\.;\-"\:\*\[\]]', '', title)`
use unicodedata.normalize for sort_title 2010-11-14 18:58:33 +00:00
			`#title = title.replace(u'Æ', 'Ae')`
unicode 2010-11-26 17:16:57 +00:00			`if isinstance(title, str):`
			`title = unicode(title)`
use unicodedata.normalize for sort_title 2010-11-14 18:58:33 +00:00			`title = unicodedata.normalize('NFKD',title)`

stream, videosupport 2010-09-03 13:28:44 +00:00			`#pad numbered titles`
			`title = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), title)`
			`return title.strip()`