2011-10-13 10:21:16 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2012-08-15 18:57:04 +00:00
# GPL 2012
2011-10-13 10:21:16 +00:00
from __future__ import division
2011-10-13 13:08:40 +00:00
import hashlib
2012-08-15 18:57:04 +00:00
import os
import re
2011-10-13 10:21:16 +00:00
from normalize import normalizeName
from text import get_sort_name , findRe
2011-10-13 13:08:40 +00:00
__all__ = [ ' parse_movie_path ' , ' create_movie_path ' , ' get_oxid ' ]
2011-10-13 10:21:16 +00:00
2012-08-16 14:21:15 +00:00
'''
Naming scheme :
X / [ Group , The ; Lastname , Firstname / ] The Title [ ( YEAR [ - YEAR ] ) ] /
The Title [ ( [ SXX ] [ EYY ] ) [ Episode Title ] ] [ . Version ] [ . Part XY [ . Part Title ] [ . en ] . ext
'''
2012-08-16 13:44:57 +00:00
def format_path ( data , has_director_directory = True ) :
2012-08-15 18:57:04 +00:00
def format_underscores ( string ) :
return re . sub ( ' ^ \ .| \ .$|/|: ' , ' _ ' , string )
2012-08-16 14:21:15 +00:00
director = data [ ' seriesDirectorSort ' if data [ ' isEpisode ' ] else ' directorSort ' ] or [ ' Unknown Director ' ]
title = data [ ' seriesTitle ' if data [ ' isEpisode ' ] else ' title ' ] or ' Untitled '
2012-08-15 22:34:12 +00:00
year = data [ ' seriesYear ' if data [ ' isEpisode ' ] else ' year ' ]
2012-08-16 14:31:58 +00:00
parts = map ( format_underscores , filter ( lambda x : x != None , [
2012-08-16 14:21:15 +00:00
data [ ' directory ' ] or director [ 0 ] [ 0 ] if has_director_directory else title [ 0 ] ,
' ; ' . join ( director ) if has_director_directory else None ,
' %s %s ' % ( title , ' ( %s ) ' % year if year else ' ' ) ,
2012-08-15 18:57:04 +00:00
' %s %s %s %s %s %s ' % (
2012-08-16 14:21:15 +00:00
data [ ' title ' ] or ' Untitled ' ,
2012-08-15 18:57:04 +00:00
' . %s ' % data [ ' version ' ] if data [ ' version ' ] else ' ' ,
' .Part %s ' % data [ ' part ' ] if data [ ' part ' ] else ' ' ,
2012-08-15 22:34:12 +00:00
' . %s ' % data [ ' partTitle ' ] if data [ ' partTitle ' ] else ' ' ,
2012-08-15 19:55:35 +00:00
' . %s ' % data [ ' language ' ] . replace ( ' / ' , ' . ' ) if not data [ ' language ' ] in [ None , ' en ' ] else ' ' ,
2012-08-16 14:21:15 +00:00
' . %s ' % data [ ' extension ' ] if data [ ' extension ' ] else ' '
2012-08-15 18:57:04 +00:00
)
2012-08-16 14:31:58 +00:00
] ) )
if data [ ' subdirectory ' ] :
parts . insert ( - 1 , data [ ' subdirectory ' ] )
return ' / ' . join ( parts )
2012-08-15 18:57:04 +00:00
def parse_path ( path ) :
2012-08-16 13:44:57 +00:00
'''
# all keys
>> > parse_path ( ' F/Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E01) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.MPEG ' ) [ ' path ' ]
' F/Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E01) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.mpg '
# pop directory title off file name
>> > parse_path ( ' U/Unknown Director/www.xxx.com.._/www.xxx.com....Directors \' s Cut.avi ' ) [ ' version ' ]
' Director \' s Cut '
# handle dots
>> > parse_path ( ' U/Unknown Director/Unknown Title (2000)/... Mr. .com....Director \' s Cut.srt ' ) [ ' version ' ]
' Director \' s Cut '
# handle underscores
>> > parse_path ( ' U/Unknown Director/_com_ 1_0 _ NaN.._/_com_ 1_0 _ NaN....avi ' ) [ ' title ' ]
' .com: 1/0 / NaN... '
'''
2012-08-15 18:57:04 +00:00
def parse_series ( string ) :
match = re . search ( ' \ ((S \ d {2} )?(E \ d {2} ([+-] \ d {2} )?)? \ )(.+)? ' , string )
season = int ( match . group ( 1 ) [ 1 : ] ) if match and match . group ( 1 ) else None
episode = int ( match . group ( 2 ) [ 1 : 3 ] ) if match and match . group ( 2 ) else None
episode_title = match . group ( 4 ) [ 1 : ] if match and match . group ( 4 ) else None
return season , episode , episode_title
def parse_title ( string ) :
match = re . search ( ' \ ( \ d {4} (- \ d {4} )? \ )$ ' , string )
year = match . group ( 0 ) [ 2 : - 1 ] if match else None
title = string [ : - len ( match . group ( 0 ) ) ] if match else string
return title , year
def parse_type ( string ) :
if string in [ ' aac ' , ' m4a ' , ' mp3 ' , ' ogg ' ] :
type = ' audio '
elif string in [ ' idx ' , ' srt ' , ' sub ' ] :
type = ' subtitle '
2012-08-16 13:44:57 +00:00
elif string in [ ' avi ' , ' divx ' , ' m4v ' , ' mkv ' , ' mov ' , ' mpg ' , ' ogv ' , ' rm ' ] :
2012-08-15 18:57:04 +00:00
type = ' video '
else :
type = None
return type
def parse_underscores ( string ) :
string = re . sub ( ' ^_ ' , ' . ' , string )
string = re . sub ( ' _$ ' , ' . ' , string )
2012-08-16 13:44:57 +00:00
string = re . sub ( ' (?<= \ w)_(?= \ w) ' , ' / ' , string )
2012-08-15 18:57:04 +00:00
string = re . sub ( ' _ ' , ' / ' , string )
2012-08-16 13:44:57 +00:00
string = re . sub ( ' (?<= \ w)_ ' , ' : ' , string )
2012-08-15 18:57:04 +00:00
return string
data = { }
parts = map ( parse_underscores , path . split ( ' / ' ) )
2012-08-16 13:44:57 +00:00
# subdirectory
if len ( parts ) > 4 :
2012-08-16 14:31:58 +00:00
data [ ' subdirectory ' ] = ' / ' . join ( parts [ 3 : - 1 ] )
2012-08-16 13:44:57 +00:00
parts = parts [ : 3 ] + parts [ - 1 : ]
else :
data [ ' subdirectory ' ] = None
length = len ( parts )
2012-08-15 18:57:04 +00:00
# directory
2012-08-16 13:44:57 +00:00
data [ ' directory ' ] , director , title , file = [
parts [ 0 ] if length > 2 else None ,
parts [ 1 ] if length == 4 else None ,
parts [ - 2 ] if length > 1 else None ,
parts [ - 1 ]
]
# directorSort, director
if director :
data [ ' directorSort ' ] = filter (
lambda x : x != ' Unknown Director ' ,
director . split ( ' ; ' )
)
data [ ' director ' ] = map (
lambda x : ' ' . join ( reversed ( x . split ( ' , ' ) ) ) ,
data [ ' directorSort ' ]
)
else :
data [ ' directorSort ' ] = data [ ' director ' ] = [ ]
2012-08-15 18:57:04 +00:00
# title, year
2012-08-16 13:44:57 +00:00
if title :
data [ ' title ' ] , data [ ' year ' ] = parse_title ( title )
file_title = re . sub ( ' ^ \ .|/|: ' , ' _ ' , data [ ' title ' ] )
title = re . sub ( ' ^ ' + re . escape ( file_title ) , ' ' , title )
else :
data [ ' title ' ] = data [ ' year ' ] = None
parts = re . split ( ' (?<! \ s) \ .(?= \ w) ' , file )
title , parts , extension = [
parts [ 0 ] ,
parts [ 1 : - 1 ] ,
parts [ - 1 ] if len ( parts ) > 1 else None
]
2012-08-16 14:21:15 +00:00
if not data [ ' title ' ] and title :
data [ ' title ' ] = title
2012-08-16 13:44:57 +00:00
# season, episode, episodeTitle
data [ ' season ' ] , data [ ' episode ' ] , data [ ' episodeTitle ' ] = parse_series ( title )
# isEpisode, seriesDirector, seriesDirectorSort, seriesTitle, seriesYear
2012-08-15 18:57:04 +00:00
if data [ ' season ' ] or data [ ' episode ' ] :
2012-08-15 22:34:12 +00:00
data [ ' isEpisode ' ] = True
data [ ' seriesDirector ' ] = data [ ' director ' ]
2012-08-16 13:44:57 +00:00
data [ ' director ' ] = [ ]
2012-08-15 22:34:12 +00:00
data [ ' seriesDirectorSort ' ] = data [ ' directorSort ' ]
2012-08-16 13:44:57 +00:00
data [ ' directorSort ' ] = [ ]
2012-08-15 22:34:12 +00:00
data [ ' seriesTitle ' ] = data [ ' title ' ]
2012-08-15 18:57:04 +00:00
data [ ' title ' ] = ' %s ( %s %s ) %s ' % (
data [ ' title ' ] ,
' S %02d ' % data [ ' season ' ] if data [ ' season ' ] else ' ' ,
' E %02d ' % data [ ' episode ' ] if data [ ' episode ' ] else ' ' ,
2012-08-15 22:34:12 +00:00
' %s ' % data [ ' episodeTitle ' ] if data [ ' episodeTitle ' ] else ' '
2012-08-15 18:57:04 +00:00
)
2012-08-15 22:34:12 +00:00
data [ ' seriesYear ' ] = data [ ' year ' ]
2012-08-15 18:57:04 +00:00
data [ ' year ' ] = None
else :
2012-08-15 22:34:12 +00:00
data [ ' isEpisode ' ] = False
2012-08-16 13:44:57 +00:00
data [ ' seriesDirector ' ] = data [ ' seriesDirectorSort ' ] = [ ]
data [ ' seriesTitle ' ] = data [ ' seriesYear ' ] = None
2012-08-15 18:57:04 +00:00
# version
2012-08-16 13:44:57 +00:00
data [ ' version ' ] = parts . pop ( 0 ) if len ( parts ) and re . search ( ' ^[A-Z0-9] ' , parts [ 0 ] ) and not re . search ( ' ^Part . ' , parts [ 0 ] ) else None
2012-08-15 18:57:04 +00:00
# part
2012-08-16 13:44:57 +00:00
data [ ' part ' ] = parts . pop ( 0 ) [ 5 : ] if len ( parts ) and re . search ( ' ^Part . ' , parts [ 0 ] ) else None
2012-08-15 22:34:12 +00:00
# partTitle
2012-08-16 13:44:57 +00:00
data [ ' partTitle ' ] = parts . pop ( 0 ) if len ( parts ) and re . search ( ' ^[A-Z0-9] ' , parts [ 0 ] ) and data [ ' part ' ] else None
2012-08-15 18:57:04 +00:00
# language
data [ ' language ' ] = None
2012-08-16 13:44:57 +00:00
while len ( parts ) and re . search ( ' ^[a-z] {2} $ ' , parts [ 0 ] ) :
2012-08-15 18:57:04 +00:00
data [ ' language ' ] = parts . pop ( 0 ) if not data [ ' language ' ] else ' %s / %s ' % (
data [ ' language ' ] , parts . pop ( 0 )
)
# extension
2012-08-16 14:21:15 +00:00
data [ ' extension ' ] = re . sub ( ' ^mpeg$ ' , ' mpg ' , extension . lower ( ) ) if extension else None
2012-08-15 18:57:04 +00:00
# type
data [ ' type ' ] = parse_type ( data [ ' extension ' ] )
if data [ ' type ' ] == ' subtitle ' and not data [ ' language ' ] :
data [ ' language ' ] = ' en '
2012-08-16 13:44:57 +00:00
# path
data [ ' path ' ] = format_path ( data )
2012-08-15 18:57:04 +00:00
return data
2011-10-13 10:21:16 +00:00
def parse_movie_path ( path ) :
"""
" A/Abrams, J.J.; Lieber, Jeffrey; Lindelof, Damon/Lost (2004)/Lost.Season 3.Episode 21.Greatest Hits.avi "
" B/Balada, Ivan/Metrum (1967)/Metrum.Part 1.en.srt "
" N/Nakata, Hideo/L - Change the World (2008)/L - Change the World.Part 2.srt "
" R/Reitz, Edgar/Heimat (1984-2006)/Heimat.Season 2.Episode 8.The Wedding.Part 2.avi "
" F/Feuillade, Louis/Les vampires (1915)/Les vampires.Episode 10.Part 2.avi "
title : ' Les vampires ' , year : ' 1915 ' , episode : 10 , part : 2
" G/Godard, Jean-Luc/Histoire(s) du cinema_ Toutes les histoires (1988)/Histoire(s) du cinema_ Toutes les histoires.avi "
" G/Godard, Jean-Luc/Six fois deux (1976)/Six fois deux.Part 1A.Y a personne.avi "
" G/Godard, Jean-Luc; Miéville, Anne-Marie/France_tour_detour_deux_enfants (1977)/France_tour_detour_deux_enfants.Part 5.Impression_Dictée.avi "
" L/Labarthe, André S_/Cinéastes de notre temps (1964-)/Cinéastes de notre temps.Episode.Jean Renoir le patron, première partie_ La Recherche du relatif.avi "
" S/Scott, Ridley/Blade Runner (1982)/Blade Runner.Directors ' s Cut.avi "
2011-10-13 17:28:18 +00:00
or
T / Title ( Year ) / Title . avi
2011-10-13 10:21:16 +00:00
"""
2011-10-14 19:50:50 +00:00
episodeTitle = episodeYear = seriesTitle = None
2011-10-13 10:21:16 +00:00
episodeDirector = [ ]
parts = path . split ( ' / ' )
#title/year
2011-10-13 17:28:18 +00:00
if len ( parts ) == 4 :
title = parts [ 2 ]
2012-01-31 00:28:58 +00:00
elif len ( parts ) > 1 :
2011-10-13 17:28:18 +00:00
title = parts [ 1 ]
2012-01-31 00:28:58 +00:00
else :
title = parts [ 0 ]
2011-11-22 21:35:06 +00:00
title = title . replace ( ' _ ' , ' : ' )
2012-03-18 14:38:51 +00:00
if title . endswith ( ' _ ' ) :
title = title [ : - 1 ] + ' . '
2011-11-22 21:35:06 +00:00
2011-10-13 10:21:16 +00:00
year = findRe ( title , ' ( \ ( \ d {4} \ )) ' )
if not year :
year = findRe ( title , ' ( \ ( \ d {4} - \ d* \ )) ' )
if year and title . endswith ( year ) :
title = title [ : - len ( year ) ] . strip ( )
year = year [ 1 : - 1 ]
if ' - ' in year :
year = findRe ( year , ' \ d {4} ' )
2011-10-13 17:28:18 +00:00
2011-10-13 10:21:16 +00:00
#director
2011-10-13 17:28:18 +00:00
if len ( parts ) == 4 :
director = parts [ 1 ]
if director . endswith ( ' _ ' ) :
director = " %s . " % director [ : - 1 ]
director = director . split ( ' ; ' )
director = [ normalizeName ( d ) . strip ( ) for d in director ]
director = filter ( lambda d : d not in ( ' Unknown Director ' , ' Various Directors ' ) , director )
else :
director = [ ]
2011-10-13 10:21:16 +00:00
#extension/language
2011-10-17 11:56:49 +00:00
fileparts = [ x . replace ( ' || ' , ' . ' ) for x in parts [ - 1 ] . replace ( ' . ' , ' || ' ) . split ( ' . ' ) ]
2012-01-31 00:28:58 +00:00
extension = len ( fileparts ) > 1 and fileparts [ - 1 ] or ' '
2011-10-13 10:21:16 +00:00
2012-01-31 00:28:58 +00:00
if len ( fileparts ) > 1 and len ( fileparts [ - 2 ] ) == 2 :
2011-10-13 10:21:16 +00:00
language = fileparts [ - 2 ]
else :
language = ' '
#season/episode/episodeTitle
season = findRe ( parts [ - 1 ] , ' \ .Season ( \ d+) \ . ' )
if season :
season = int ( season )
else :
season = None
episode = findRe ( parts [ - 1 ] , ' \ .Episode ( \ d+) \ . ' )
if episode :
episode = int ( episode )
else :
episode = None
2011-10-21 10:00:20 +00:00
if episode and ' Episode %d ' % episode in fileparts :
2011-10-13 10:21:16 +00:00
episodeTitle = fileparts . index ( ' Episode %d ' % episode ) + 1
episodeTitle = fileparts [ episodeTitle ]
if episodeTitle == extension or episodeTitle . startswith ( ' Part ' ) :
episodeTitle = None
2011-11-22 21:21:29 +00:00
if not season and ' Episode ' in fileparts :
episodeTitle = fileparts . index ( ' Episode ' ) + 1
episodeTitle = fileparts [ episodeTitle ]
if episodeTitle == extension or episodeTitle . startswith ( ' Part ' ) :
episodeTitle = None
else :
season = 1
2011-10-14 19:50:50 +00:00
if season :
seriesTitle = title
title = u ' %s (S %02d ) ' % ( seriesTitle , season )
2011-11-22 21:30:29 +00:00
if isinstance ( episode , int ) :
2011-10-14 19:50:50 +00:00
title = u ' %s (S %02d E %02d ) ' % ( seriesTitle , season , episode )
if episodeTitle :
title = u ' %s %s ' % ( title , episodeTitle )
2011-10-13 10:21:16 +00:00
#part
part = findRe ( parts [ - 1 ] , ' \ .Part ( \ d+) \ . ' )
if part :
part = int ( part )
else :
part = 0
return {
' director ' : director ,
' episodeDirector ' : episodeDirector ,
' episode ' : episode ,
' episodeTitle ' : episodeTitle ,
' episodeYear ' : episodeYear ,
' extension ' : extension ,
' language ' : language ,
' part ' : part ,
' season ' : season ,
2011-10-14 19:50:50 +00:00
' seriesTitle ' : seriesTitle ,
2011-10-13 10:21:16 +00:00
' title ' : title ,
' year ' : year ,
}
def create_movie_path ( title , director , year ,
season , episode , episodeTitle , episodeDirector , episodeYear ,
part , language , extension ) :
'''
{
title : ' ' , director : [ ' ' ] , year : ' ' ,
season : int , episode : int , episodeTitle : ' ' , episodeDirector : [ ' ' ] , episodeYear : ' ' ,
part : int , language : ' ' , extension : ' ' , extra : bool
} )
'''
partTitle = None
director = ' ; ' . join ( map ( get_sort_name , director ) )
episodeDirector = ' ; ' . join ( map ( get_sort_name , episodeDirector ) )
filename = [ title ]
if season :
filename + = [ ' Season %d ' % season ]
if episode :
filename + = [ ' Episode %d ' % episode ]
if episodeTitle :
filename + = [ episodeTitle ]
if part :
filename + = [ ' Part %s ' % part ]
if partTitle :
filename + = [ partTitle ]
2012-01-31 00:28:58 +00:00
if extension :
filename + = [ extension ]
2011-10-13 10:21:16 +00:00
filename = ' . ' . join ( filename )
path = os . path . join ( director [ 0 ] , director , ' %s ( %s ) ' % ( title , year ) , filename )
return path
2011-10-13 13:08:40 +00:00
def get_oxid ( title , director = [ ] , year = ' ' ,
season = ' ' , episode = ' ' , episode_title = ' ' , episode_director = [ ] , episode_year = ' ' ) :
def get_hash ( string ) :
return hashlib . sha1 ( string . encode ( ' utf-8 ' ) ) . hexdigest ( ) . upper ( )
director = ' , ' . join ( director )
episode_director = ' , ' . join ( episode_director )
2011-10-17 20:53:02 +00:00
if not episode and not episode_title :
2011-10-13 13:08:40 +00:00
oxid = get_hash ( director ) [ : 8 ] + get_hash ( ' \n ' . join ( [ title , str ( year ) ] ) ) [ : 8 ]
else :
oxid = get_hash ( ' \n ' . join ( [ director , title , str ( year ) , str ( season ) ] ) ) [ : 8 ] + \
get_hash ( ' \n ' . join ( [ str ( episode ) , episode_director , episode_title , str ( episode_year ) ] ) ) [ : 8 ]
return u ' 0x ' + oxid