2011-10-13 10:21:16 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2012-08-15 18:57:04 +00:00
# GPL 2012
2011-10-13 10:21:16 +00:00
from __future__ import division
2011-10-13 13:08:40 +00:00
import hashlib
2012-08-15 18:57:04 +00:00
import os
import re
2012-09-11 12:36:51 +00:00
import unicodedata
2011-10-13 10:21:16 +00:00
2012-08-14 14:12:43 +00:00
from normalize import normalize_name
from text import get_sort_name , find_re
2011-10-13 10:21:16 +00:00
2011-10-13 13:08:40 +00:00
__all__ = [ ' parse_movie_path ' , ' create_movie_path ' , ' get_oxid ' ]
2011-10-13 10:21:16 +00:00
2012-08-20 17:42:03 +00:00
EXTENSIONS = {
2012-08-16 16:57:57 +00:00
' audio ' : [
' aac ' , ' flac ' , ' m4a ' , ' mp3 ' , ' oga ' , ' ogg ' , ' wav ' , ' wma '
] ,
' subtitle ' : [
' idx ' , ' srt ' , ' sub '
] ,
' video ' : [
2012-11-04 15:12:28 +00:00
' avi ' , ' divx ' , ' dv ' , ' flv ' , ' m2t ' , ' m4v ' , ' mkv ' , ' mov ' , ' mp4 ' ,
2012-11-04 15:08:00 +00:00
' mpeg ' , ' mpg ' , ' mts ' , ' ogm ' , ' ogv ' , ' rm ' , ' vob ' , ' webm ' , ' wmv '
2012-08-16 16:57:57 +00:00
] ,
}
2012-08-20 17:42:03 +00:00
LANGUAGES = [ ' en ' , ' fr ' , ' de ' , ' es ' , ' it ' ]
2012-08-16 16:57:57 +00:00
2012-08-16 14:21:15 +00:00
'''
Naming scheme :
2012-08-16 23:59:09 +00:00
X / [ Group , The ; Lastname , Firstname / ] The Title [ ( YEAR [ - [ YEAR ] ] ) ] /
The Title [ ( [ SXX ] [ EYY [ + ZZ | - ZZ ] ] ) [ Episode Title ] ] [ . Version ] [ . Part XY [ . Part Title ] [ . en ] [ . fr ] . xyz
2012-08-16 14:21:15 +00:00
'''
2012-09-11 15:55:35 +00:00
def format_path ( data , directory_key = ' director ' ) :
2012-08-15 18:57:04 +00:00
def format_underscores ( string ) :
2012-09-11 17:22:45 +00:00
return re . sub ( ' ^ \ .| \ .$|:|/| \ ?|<|> ' , ' _ ' , string )
2012-09-11 15:55:35 +00:00
is_episode = data [ ' episode ' ] != None or data [ ' season ' ] != None
2012-09-11 12:36:51 +00:00
director = data [ ' directorSort ' ] or [ ' Unknown Director ' ]
2012-09-11 16:30:42 +00:00
title = data [ ' seriesTitle ' if data [ ' isEpisode ' ] else ' title ' ] or ' Untitled '
year = data [ ' seriesYear ' if data [ ' isEpisode ' ] else ' year ' ] or None
2012-09-01 03:27:59 +00:00
language = ' en ' if data [ ' type ' ] == ' subtitle ' and data [ ' language ' ] == None else data [ ' language ' ]
2012-08-16 14:31:58 +00:00
parts = map ( format_underscores , filter ( lambda x : x != None , [
2012-10-08 09:27:28 +00:00
u ' ; ' . join ( director [ : 10 ] ) ,
2012-09-11 15:55:35 +00:00
u ' %s %s ' % ( title , u ' ( %s ) ' % year if year else ' ' ) ,
2012-09-11 12:36:51 +00:00
u ' %s %s %s %s %s %s ' % (
2012-08-16 14:21:15 +00:00
data [ ' title ' ] or ' Untitled ' ,
2012-09-11 12:36:51 +00:00
u ' . %s ' % data [ ' version ' ] if data [ ' version ' ] else ' ' ,
u ' .Part %s ' % data [ ' part ' ] if data [ ' part ' ] else ' ' ,
u ' . %s ' % data [ ' partTitle ' ] if data [ ' partTitle ' ] else ' ' ,
u ' . %s ' % data [ ' language ' ] if data [ ' language ' ] else ' ' ,
u ' . %s ' % data [ ' extension ' ] if data [ ' extension ' ] else ' '
2012-08-15 18:57:04 +00:00
)
2012-08-16 14:31:58 +00:00
] ) )
2012-09-11 12:36:51 +00:00
if data . get ( ' subdirectory ' ) :
2012-08-16 14:31:58 +00:00
parts . insert ( - 1 , data [ ' subdirectory ' ] )
2012-09-11 12:36:51 +00:00
return unicodedata . normalize ( ' NFD ' , u ' / ' . join ( parts ) )
2012-08-20 16:19:17 +00:00
2012-09-01 03:27:59 +00:00
2012-08-21 22:32:35 +00:00
def parse_item_files ( files ) :
# parses a list of file objects associated with one item (file objects
2012-08-22 10:27:32 +00:00
# as returned by parse_path, but extended with 'path' and 'time')
2012-08-21 22:32:35 +00:00
def get_file_key ( file ) :
return ' \n ' . join ( [
2012-08-22 10:22:12 +00:00
file [ ' version ' ] or ' ' ,
file [ ' part ' ] or ' ' ,
2012-08-22 15:17:22 +00:00
file [ ' language ' ] or ' ' ,
2012-08-22 10:22:12 +00:00
file [ ' extension ' ] or ' '
] )
2012-08-21 22:32:35 +00:00
def get_version_key ( file , extension = True ) :
2012-08-22 10:53:02 +00:00
return ' %s / %s -part/ %s ' % (
2012-08-22 10:50:13 +00:00
file [ ' version ' ] or ' ' ,
2012-08-21 22:32:35 +00:00
' single ' if file [ ' part ' ] == None else ' multi ' ,
file [ ' extension ' ] if extension else ' '
)
2012-08-22 09:13:14 +00:00
# filter out duplicate files (keep shortest path, sorted alphabetically)
2012-08-21 22:32:35 +00:00
# since same version+part+language+extension can still differ in part title,
2012-08-22 09:13:14 +00:00
# ''/'en' or 'mpg'/'mpeg', or have an unparsed section in their path
2012-08-21 22:32:35 +00:00
unique_files = [ ]
duplicate_files = [ ]
for key in [ get_file_key ( file ) for file in files ] :
key_files = sorted (
2012-08-22 10:27:32 +00:00
sorted ( [ file for file in files if get_file_key ( file ) == key ] ) ,
key = lambda x : len ( x [ ' path ' ] )
2012-08-21 22:32:35 +00:00
)
2012-08-22 10:24:29 +00:00
unique_files . append ( key_files [ 0 ] )
duplicate_files + = key_files [ 1 : ]
2012-08-21 22:32:35 +00:00
# determine versions ('version.single|multi-part.videoextension')
version_files = { }
2012-08-21 22:42:54 +00:00
time = { }
2012-08-21 22:32:35 +00:00
video_files = [ file for file in unique_files if file [ ' type ' ] == ' video ' ]
versions = set ( [ file [ ' version ' ] for file in video_files ] )
for version in versions :
for file in [ file for file in video_files if file [ ' version ' ] == version ] :
version_key = get_version_key ( file )
2012-08-21 22:42:54 +00:00
version_files [ version_key ] = ( version_files [ version_key ] if version_key in version_files else [ ] ) + [ file ]
time [ version_key ] = sorted ( [ time [ version_key ] , file [ ' time ' ] ] ) [ - 1 ] if version_key in time else file [ ' time ' ]
# determine preferred video extension (newest)
2012-08-21 22:32:35 +00:00
extension = { }
2012-08-22 10:54:56 +00:00
for key in set ( [ ' / ' . join ( version_key . split ( ' / ' ) [ : - 1 ] ) + ' / ' for version_key in version_files ] ) :
2012-08-22 10:53:02 +00:00
extensions = set ( [ version_key . split ( ' / ' ) [ - 1 ] for version_key in version_files if version_key . startswith ( key ) ] )
2012-08-21 22:42:54 +00:00
extension [ key ] = sorted ( extensions , key = lambda x : time [ key + x ] ) [ - 1 ]
2012-08-21 22:32:35 +00:00
# associate other (non-video) files
other_files = [ file for file in unique_files if file [ ' type ' ] != ' video ' ]
versions = set ( [ file [ ' version ' ] for file in other_files ] )
for version in versions :
for file in [ file for file in other_files if file [ ' version ' ] == version ] :
key = get_version_key ( file , extension = False )
if key in extension :
version_files [ key + extension [ key ] ] . append ( file )
else :
2012-08-21 22:42:54 +00:00
version_files [ key ] = ( version_files [ key ] if key in version_files else [ ] ) + [ file ]
2012-08-22 12:52:35 +00:00
extension [ key ] = ' '
2012-08-22 15:10:54 +00:00
# determine main files (video + srt)
2012-08-21 22:32:35 +00:00
full = { }
language = { }
main_files = { }
for version_key in version_files :
parts = sorted ( list ( set ( [ file [ ' part ' ] for file in version_files [ version_key ] ] ) ) )
2012-08-22 15:10:54 +00:00
# determine if all parts have one video file
2012-08-21 22:32:35 +00:00
video_files = [ file for file in version_files [ version_key ] if file [ ' type ' ] == ' video ' ]
full [ version_key ] = len ( video_files ) == len ( parts )
main_files [ version_key ] = video_files if full [ version_key ] else [ ]
# determine preferred subtitle language
language [ version_key ] = None
2012-08-22 10:31:16 +00:00
subtitle_files = [ file for file in version_files [ version_key ] if file [ ' extension ' ] == ' srt ' ]
2012-08-21 22:32:35 +00:00
for subtitle_language in sorted (
list ( set ( [ file [ ' language ' ] for file in subtitle_files ] ) ) ,
key = lambda x : LANGUAGES . index ( x ) if x in LANGUAGES else x
) :
language_files = [ file for file in subtitle_files if file [ ' language ' ] == subtitle_language ]
if len ( subtitle_files ) == len ( parts ) :
language [ version_key ] = subtitle_language
main_files [ version_key ] + = language_files
break
2012-08-22 10:36:17 +00:00
# determine main version (best srt language, then video time)
2012-08-21 22:32:35 +00:00
main_version = None
full_version_keys = sorted (
[ version_key for version_key in version_files if full [ version_key ] ] ,
2012-08-22 10:36:17 +00:00
key = lambda x : time [ x ] ,
reverse = True
2012-08-21 22:32:35 +00:00
)
if full_version_keys :
language_version_keys = sorted (
[ version_key for version_key in full_version_keys if language [ version_key ] ] ,
key = lambda x : LANGUAGES . index ( language [ x ] ) if language [ x ] in LANGUAGES else language [ x ]
)
main_version = language_version_keys [ 0 ] if language_version_keys else full_version_keys [ 0 ]
# add duplicate files
for file in duplicate_files :
2012-08-22 12:13:17 +00:00
key = get_version_key ( file , extension = False )
version_key = ' %s %s ' % ( key , extension [ key ] if key in extension else ' ' )
2012-08-22 12:15:32 +00:00
version_files [ version_key ] = ( version_files [ version_key ] if version_key in version_files else [ ] ) + [ file ]
2012-08-21 22:32:35 +00:00
# return data
2012-08-22 10:44:43 +00:00
data = [ ]
2012-08-21 22:32:35 +00:00
for version_key in version_files :
2012-08-22 10:44:43 +00:00
data . append ( {
2012-08-21 22:32:35 +00:00
' files ' : sorted (
[ dict ( file , isMainFile = file in main_files [ version_key ] ) for file in version_files [ version_key ] ] ,
2012-08-22 09:13:14 +00:00
key = lambda x : x [ ' path ' ]
2012-08-21 22:32:35 +00:00
) ,
' isFullVersion ' : full [ version_key ] ,
' isMainVersion ' : version_key == main_version ,
2012-08-22 10:44:43 +00:00
' subtitleLanguage ' : language [ version_key ] if version_key in language else None ,
' version ' : version_key
} )
2012-08-21 22:32:35 +00:00
return data
2012-09-11 15:55:35 +00:00
def parse_path ( path , directory_key = ' director ' ) :
2012-08-16 13:44:57 +00:00
'''
# all keys
2012-09-11 15:55:35 +00:00
>> > parse_path ( ' Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E01) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.MPEG ' ) [ ' normalizedPath ' ]
' Frost, Mark; Lynch, David/Twin Peaks (1991)/Twin Peaks (S01E00) Pilot.European Version.Part 1.Welcome to Twin Peaks.en.fr.mpg '
2012-09-09 17:28:11 +00:00
2012-08-16 13:44:57 +00:00
# pop directory title off file name
2012-09-11 15:55:35 +00:00
>> > parse_path ( " Unknown Director/www.xxx.com.._/www.xxx.com....Director ' s Cut.avi " ) [ ' version ' ]
2012-09-09 17:28:11 +00:00
" Director ' s Cut "
2012-08-16 13:44:57 +00:00
# handle dots
2012-09-11 15:55:35 +00:00
>> > parse_path ( " Unknown Director/Unknown Title (2000)/... Mr. .com....Director ' s Cut.srt " ) [ ' version ' ]
2012-09-09 17:28:11 +00:00
" Director ' s Cut "
2012-08-18 19:06:50 +00:00
# multiple years, season zero, multiple episodes, dots in episode title and part title
2012-09-11 15:55:35 +00:00
>> > parse_path ( ' Groening, Matt/The Simpsons (1989-2012)/The Simpsons (S00E01-02) D.I.Y..Uncensored Version.Part 1.D.I.Y..de.avi ' ) [ ' normalizedPath ' ]
' Groening, Matt/The Simpsons (1989-2012)/The Simpsons (S01E01+02) D.I.Y..Uncensored Version.Part 1.D.I.Y..de.avi '
2012-09-09 17:28:11 +00:00
2012-08-16 13:44:57 +00:00
# handle underscores
2012-09-11 15:55:35 +00:00
>> > parse_path ( ' Unknown Director/_com_ 1_0 _ NaN.._/_com_ 1_0 _ NaN....avi ' ) [ ' title ' ]
2012-08-16 13:44:57 +00:00
' .com: 1/0 / NaN... '
2012-09-09 17:28:11 +00:00
2012-08-16 23:59:09 +00:00
# TODO: '.com.avi'
2012-08-16 13:44:57 +00:00
'''
2012-08-15 18:57:04 +00:00
def parse_title ( string ) :
return title , year
def parse_type ( string ) :
2012-08-20 17:42:03 +00:00
for type in EXTENSIONS :
if string in EXTENSIONS [ type ] :
2012-08-16 16:57:57 +00:00
return type
return None
2012-08-15 18:57:04 +00:00
def parse_underscores ( string ) :
2012-09-11 17:58:30 +00:00
# '^_' or '_$' is '.'
2012-08-15 18:57:04 +00:00
string = re . sub ( ' ^_ ' , ' . ' , string )
string = re . sub ( ' _$ ' , ' . ' , string )
2012-09-11 17:58:30 +00:00
# '_.foo$' or '_ (' is '?'
string = re . sub ( ' _(?=( \ . \ w+$| \ ()) ' , ' ? ' , string )
# ' _..._ ' is '<...>'
2012-09-11 17:21:45 +00:00
string = re . sub ( ' (?<= )_(.+)_(?= ) ' , ' < \ g<1>> ' , string )
2012-09-11 17:58:30 +00:00
# 'foo_bar' or 'foo _ bar' is '/'
2012-08-16 13:44:57 +00:00
string = re . sub ( ' (?<= \ w)_(?= \ w) ' , ' / ' , string )
2012-08-15 18:57:04 +00:00
string = re . sub ( ' _ ' , ' / ' , string )
2012-09-11 17:58:30 +00:00
# 'foo_ ' is ':'
2012-08-16 13:44:57 +00:00
string = re . sub ( ' (?<= \ w)_ ' , ' : ' , string )
2012-08-15 18:57:04 +00:00
return string
data = { }
2012-08-17 13:51:03 +00:00
parts = map ( lambda x : parse_underscores ( x . strip ( ) ) , path . split ( ' / ' ) )
2012-08-16 13:44:57 +00:00
# subdirectory
if len ( parts ) > 4 :
2012-08-16 14:31:58 +00:00
data [ ' subdirectory ' ] = ' / ' . join ( parts [ 3 : - 1 ] )
2012-08-16 13:44:57 +00:00
parts = parts [ : 3 ] + parts [ - 1 : ]
else :
data [ ' subdirectory ' ] = None
length = len ( parts )
2012-09-11 15:55:35 +00:00
director , title , file = [
parts [ - 3 ] if length > 2 else None ,
2012-08-16 13:44:57 +00:00
parts [ - 2 ] if length > 1 else None ,
parts [ - 1 ]
]
# directorSort, director
2012-08-18 13:21:24 +00:00
data [ ' directorSort ' ] = data [ ' director ' ] = [ ]
2012-08-16 13:44:57 +00:00
if director :
data [ ' directorSort ' ] = filter (
lambda x : x != ' Unknown Director ' ,
director . split ( ' ; ' )
)
data [ ' director ' ] = map (
lambda x : ' ' . join ( reversed ( x . split ( ' , ' ) ) ) ,
data [ ' directorSort ' ]
)
2012-08-15 18:57:04 +00:00
# title, year
2012-08-18 13:21:24 +00:00
data [ ' title ' ] = data [ ' year ' ] = None
2012-08-16 13:44:57 +00:00
if title :
2012-08-18 13:21:24 +00:00
match = re . search ( ' \ ( \ d {4} (-( \ d {4} )?)? \ )$ ' , title )
2012-08-16 16:18:44 +00:00
data [ ' title ' ] = title [ : - len ( match . group ( 0 ) ) ] if match else title
data [ ' year ' ] = match . group ( 0 ) [ 2 : - 1 ] if match else None
2012-08-18 19:35:58 +00:00
file_title = re . sub ( ' [/:] ' , ' _ ' , data [ ' title ' ] )
2012-08-18 20:24:51 +00:00
# (remove title from beginning of filename if the rest contains a dot)
file = re . sub ( ' ^ ' + re . escape ( file_title ) + ' (?=.* \ .) ' , ' ' , file )
2012-08-20 13:32:06 +00:00
# (split by nospace+dot+word, but remove spaces preceding extension)
2012-08-18 20:24:51 +00:00
parts = re . split ( ' (?<! \ s) \ .(?= \ w) ' , re . sub ( ' \ s+(?=. \ w+$) ' , ' ' , file ) )
2012-08-16 13:44:57 +00:00
title , parts , extension = [
parts [ 0 ] ,
parts [ 1 : - 1 ] ,
parts [ - 1 ] if len ( parts ) > 1 else None
]
2012-08-16 14:21:15 +00:00
if not data [ ' title ' ] and title :
data [ ' title ' ] = title
2012-08-18 13:21:24 +00:00
# season, episode, episodes, episodeTitle
2012-08-18 12:52:46 +00:00
data [ ' season ' ] = data [ ' episode ' ] = data [ ' episodeTitle ' ] = None
data [ ' episodes ' ] = [ ]
2012-08-16 16:18:44 +00:00
match = re . search ( ' \ ((S \ d {2} )?(E \ d {2} ([+-] \ d {2} )?)? \ )(.+)? ' , title )
2012-08-18 12:52:46 +00:00
if match :
if match . group ( 1 ) :
data [ ' season ' ] = int ( match . group ( 1 ) [ 1 : ] )
if match . group ( 2 ) :
if len ( match . group ( 2 ) ) == 3 :
data [ ' episode ' ] = int ( match . group ( 2 ) [ 1 : ] )
else :
data [ ' episodes ' ] = range ( int ( match . group ( 2 ) [ 1 : 3 ] ) , int ( match . group ( 2 ) [ - 2 : ] ) + 1 )
if match . group ( 4 ) :
data [ ' episodeTitle ' ] = match . group ( 4 ) [ 1 : ]
2012-08-18 11:49:05 +00:00
while data [ ' episodeTitle ' ] and len ( parts ) and re . search ( ' ^ \ w+ \ .*$ ' , parts [ 0 ] ) and not re . search ( ' ^[a-z] {2} $ ' , parts [ 0 ] ) :
data [ ' episodeTitle ' ] + = ' . %s ' % parts . pop ( 0 )
2012-09-11 13:29:10 +00:00
# isEpisode, seriesTitle, seriesYear
2012-08-18 13:21:24 +00:00
data [ ' isEpisode ' ] = False
data [ ' seriesTitle ' ] = data [ ' seriesYear ' ] = None
2012-08-18 12:52:46 +00:00
if data [ ' season ' ] != None or data [ ' episode ' ] != None or data [ ' episodes ' ] :
2012-08-15 22:34:12 +00:00
data [ ' isEpisode ' ] = True
data [ ' seriesTitle ' ] = data [ ' title ' ]
2012-08-18 12:52:46 +00:00
season = ' S %02d ' % data [ ' season ' ] if data [ ' season ' ] != None else ' '
episode = ' '
if data [ ' episode ' ] != None :
episode = ' E %02d ' % data [ ' episode ' ]
elif data [ ' episodes ' ] :
episode = ' E %02d %s %02d ' % (
data [ ' episodes ' ] [ 0 ] , ' + ' if len ( data [ ' episodes ' ] ) == 2 else ' - ' , data [ ' episodes ' ] [ - 1 ]
)
episodeTitle = ' %s ' % data [ ' episodeTitle ' ] if data [ ' episodeTitle ' ] else ' '
2012-08-18 13:21:24 +00:00
data [ ' title ' ] + = ' ( %s %s ) %s ' % ( season , episode , episodeTitle )
2012-08-15 22:34:12 +00:00
data [ ' seriesYear ' ] = data [ ' year ' ]
2012-08-15 18:57:04 +00:00
data [ ' year ' ] = None
# version
2012-08-16 13:44:57 +00:00
data [ ' version ' ] = parts . pop ( 0 ) if len ( parts ) and re . search ( ' ^[A-Z0-9] ' , parts [ 0 ] ) and not re . search ( ' ^Part . ' , parts [ 0 ] ) else None
2012-08-15 18:57:04 +00:00
# part
2012-08-16 13:44:57 +00:00
data [ ' part ' ] = parts . pop ( 0 ) [ 5 : ] if len ( parts ) and re . search ( ' ^Part . ' , parts [ 0 ] ) else None
2012-08-15 22:34:12 +00:00
# partTitle
2012-08-16 13:44:57 +00:00
data [ ' partTitle ' ] = parts . pop ( 0 ) if len ( parts ) and re . search ( ' ^[A-Z0-9] ' , parts [ 0 ] ) and data [ ' part ' ] else None
2012-08-18 19:06:50 +00:00
while data [ ' partTitle ' ] and len ( parts ) and not re . search ( ' ^[a-z] {2} $ ' , parts [ 0 ] ) :
data [ ' partTitle ' ] + = ' . %s ' % parts . pop ( 0 )
2012-08-15 18:57:04 +00:00
# language
2012-08-22 15:19:31 +00:00
language = parts . pop ( 0 ) if len ( parts ) and re . search ( ' ^[a-z] {2} $ ' , parts [ 0 ] ) else None
2012-08-15 18:57:04 +00:00
# extension
2012-08-16 14:21:15 +00:00
data [ ' extension ' ] = re . sub ( ' ^mpeg$ ' , ' mpg ' , extension . lower ( ) ) if extension else None
2012-08-15 18:57:04 +00:00
# type
data [ ' type ' ] = parse_type ( data [ ' extension ' ] )
2012-08-22 15:20:58 +00:00
# language
data [ ' language ' ] = language or LANGUAGES [ 0 ] if data [ ' type ' ] == ' subtitle ' else None
2012-08-22 09:13:14 +00:00
# normalizedPath
data [ ' normalizedPath ' ] = format_path ( data )
2012-08-15 18:57:04 +00:00
return data
2011-10-13 10:21:16 +00:00
def parse_movie_path ( path ) :
"""
" A/Abrams, J.J.; Lieber, Jeffrey; Lindelof, Damon/Lost (2004)/Lost.Season 3.Episode 21.Greatest Hits.avi "
" B/Balada, Ivan/Metrum (1967)/Metrum.Part 1.en.srt "
" N/Nakata, Hideo/L - Change the World (2008)/L - Change the World.Part 2.srt "
" R/Reitz, Edgar/Heimat (1984-2006)/Heimat.Season 2.Episode 8.The Wedding.Part 2.avi "
" F/Feuillade, Louis/Les vampires (1915)/Les vampires.Episode 10.Part 2.avi "
title : ' Les vampires ' , year : ' 1915 ' , episode : 10 , part : 2
" G/Godard, Jean-Luc/Histoire(s) du cinema_ Toutes les histoires (1988)/Histoire(s) du cinema_ Toutes les histoires.avi "
" G/Godard, Jean-Luc/Six fois deux (1976)/Six fois deux.Part 1A.Y a personne.avi "
" G/Godard, Jean-Luc; Miéville, Anne-Marie/France_tour_detour_deux_enfants (1977)/France_tour_detour_deux_enfants.Part 5.Impression_Dictée.avi "
" L/Labarthe, André S_/Cinéastes de notre temps (1964-)/Cinéastes de notre temps.Episode.Jean Renoir le patron, première partie_ La Recherche du relatif.avi "
" S/Scott, Ridley/Blade Runner (1982)/Blade Runner.Directors ' s Cut.avi "
2011-10-13 17:28:18 +00:00
or
T / Title ( Year ) / Title . avi
2011-10-13 10:21:16 +00:00
"""
2011-10-14 19:50:50 +00:00
episodeTitle = episodeYear = seriesTitle = None
2011-10-13 10:21:16 +00:00
episodeDirector = [ ]
parts = path . split ( ' / ' )
#title/year
2011-10-13 17:28:18 +00:00
if len ( parts ) == 4 :
title = parts [ 2 ]
2012-01-31 00:28:58 +00:00
elif len ( parts ) > 1 :
2011-10-13 17:28:18 +00:00
title = parts [ 1 ]
2012-01-31 00:28:58 +00:00
else :
title = parts [ 0 ]
2011-11-22 21:35:06 +00:00
title = title . replace ( ' _ ' , ' : ' )
2012-03-18 14:38:51 +00:00
if title . endswith ( ' _ ' ) :
title = title [ : - 1 ] + ' . '
2012-08-15 15:15:40 +00:00
if title . startswith ( ' _ ' ) :
title = ' . ' + title [ 1 : ]
2011-11-22 21:35:06 +00:00
2012-08-14 14:12:43 +00:00
year = find_re ( title , ' ( \ ( \ d {4} \ )) ' )
2011-10-13 10:21:16 +00:00
if not year :
2012-08-14 14:12:43 +00:00
year = find_re ( title , ' ( \ ( \ d {4} - \ d* \ )) ' )
2011-10-13 10:21:16 +00:00
if year and title . endswith ( year ) :
title = title [ : - len ( year ) ] . strip ( )
year = year [ 1 : - 1 ]
if ' - ' in year :
2012-08-14 14:12:43 +00:00
year = find_re ( year , ' \ d {4} ' )
2011-10-13 17:28:18 +00:00
2011-10-13 10:21:16 +00:00
#director
2011-10-13 17:28:18 +00:00
if len ( parts ) == 4 :
director = parts [ 1 ]
if director . endswith ( ' _ ' ) :
director = " %s . " % director [ : - 1 ]
director = director . split ( ' ; ' )
2012-08-14 14:12:43 +00:00
director = [ normalize_name ( d ) . strip ( ) for d in director ]
2011-10-13 17:28:18 +00:00
director = filter ( lambda d : d not in ( ' Unknown Director ' , ' Various Directors ' ) , director )
else :
director = [ ]
2011-10-13 10:21:16 +00:00
#extension/language
2011-10-17 11:56:49 +00:00
fileparts = [ x . replace ( ' || ' , ' . ' ) for x in parts [ - 1 ] . replace ( ' . ' , ' || ' ) . split ( ' . ' ) ]
2012-01-31 00:28:58 +00:00
extension = len ( fileparts ) > 1 and fileparts [ - 1 ] or ' '
2011-10-13 10:21:16 +00:00
2012-01-31 00:28:58 +00:00
if len ( fileparts ) > 1 and len ( fileparts [ - 2 ] ) == 2 :
2011-10-13 10:21:16 +00:00
language = fileparts [ - 2 ]
else :
language = ' '
#season/episode/episodeTitle
2012-09-01 03:27:59 +00:00
match = re . compile ( ' (.+?) \ ((S( \ d+))?(E( \ d+))? \ )( (.+?))? \ . ' ) . match ( parts [ - 1 ] )
if match :
seriesTitle = match . group ( 1 )
season = match . group ( 3 )
episode = match . group ( 5 )
episodeTitle = ( match . group ( 6 ) or ' ' ) . strip ( )
if episode != None :
episode = int ( episode )
if season != None :
season = int ( season )
if episode and not season :
season = 1
2011-10-13 10:21:16 +00:00
else :
2012-09-01 03:27:59 +00:00
season = find_re ( parts [ - 1 ] , ' \ .Season ( \ d+) \ . ' )
if season :
season = int ( season )
else :
season = None
2011-10-13 10:21:16 +00:00
2012-09-01 03:27:59 +00:00
episode = find_re ( parts [ - 1 ] , ' \ .Episode[s]* ([ \ d+]+) \ . ' )
if episode :
episode = episode . split ( ' + ' ) [ 0 ]
episode = int ( episode )
else :
episode = None
2011-10-13 10:21:16 +00:00
2012-09-01 03:27:59 +00:00
if episode and ' Episode %d ' % episode in fileparts :
episodeTitle = fileparts . index ( ' Episode %d ' % episode ) + 1
episodeTitle = fileparts [ episodeTitle ]
if episodeTitle == extension or episodeTitle . startswith ( ' Part ' ) :
episodeTitle = None
2011-11-22 21:21:29 +00:00
2012-09-01 03:27:59 +00:00
if not season and ' Episode ' in fileparts :
episodeTitle = fileparts . index ( ' Episode ' ) + 1
episodeTitle = fileparts [ episodeTitle ]
if episodeTitle == extension or episodeTitle . startswith ( ' Part ' ) :
episodeTitle = None
else :
season = 1
2011-11-22 21:21:29 +00:00
2011-10-14 19:50:50 +00:00
if season :
seriesTitle = title
title = u ' %s (S %02d ) ' % ( seriesTitle , season )
2011-11-22 21:30:29 +00:00
if isinstance ( episode , int ) :
2011-10-14 19:50:50 +00:00
title = u ' %s (S %02d E %02d ) ' % ( seriesTitle , season , episode )
if episodeTitle :
title = u ' %s %s ' % ( title , episodeTitle )
2011-10-13 10:21:16 +00:00
#part
2012-08-14 14:12:43 +00:00
part = find_re ( parts [ - 1 ] , ' \ .Part ( \ d+) \ . ' )
2011-10-13 10:21:16 +00:00
if part :
part = int ( part )
else :
part = 0
return {
' director ' : director ,
' episodeDirector ' : episodeDirector ,
' episode ' : episode ,
' episodeTitle ' : episodeTitle ,
' episodeYear ' : episodeYear ,
' extension ' : extension ,
' language ' : language ,
' part ' : part ,
' season ' : season ,
2011-10-14 19:50:50 +00:00
' seriesTitle ' : seriesTitle ,
2011-10-13 10:21:16 +00:00
' title ' : title ,
' year ' : year ,
}
def create_movie_path ( title , director , year ,
season , episode , episodeTitle , episodeDirector , episodeYear ,
part , language , extension ) :
'''
{
title : ' ' , director : [ ' ' ] , year : ' ' ,
season : int , episode : int , episodeTitle : ' ' , episodeDirector : [ ' ' ] , episodeYear : ' ' ,
part : int , language : ' ' , extension : ' ' , extra : bool
} )
'''
partTitle = None
director = ' ; ' . join ( map ( get_sort_name , director ) )
episodeDirector = ' ; ' . join ( map ( get_sort_name , episodeDirector ) )
filename = [ title ]
if season :
filename + = [ ' Season %d ' % season ]
if episode :
filename + = [ ' Episode %d ' % episode ]
if episodeTitle :
filename + = [ episodeTitle ]
if part :
filename + = [ ' Part %s ' % part ]
if partTitle :
filename + = [ partTitle ]
2012-01-31 00:28:58 +00:00
if extension :
filename + = [ extension ]
2011-10-13 10:21:16 +00:00
filename = ' . ' . join ( filename )
path = os . path . join ( director [ 0 ] , director , ' %s ( %s ) ' % ( title , year ) , filename )
return path
2011-10-13 13:08:40 +00:00
def get_oxid ( title , director = [ ] , year = ' ' ,
season = ' ' , episode = ' ' , episode_title = ' ' , episode_director = [ ] , episode_year = ' ' ) :
def get_hash ( string ) :
return hashlib . sha1 ( string . encode ( ' utf-8 ' ) ) . hexdigest ( ) . upper ( )
director = ' , ' . join ( director )
episode_director = ' , ' . join ( episode_director )
2011-10-17 20:53:02 +00:00
if not episode and not episode_title :
2011-10-13 13:08:40 +00:00
oxid = get_hash ( director ) [ : 8 ] + get_hash ( ' \n ' . join ( [ title , str ( year ) ] ) ) [ : 8 ]
else :
oxid = get_hash ( ' \n ' . join ( [ director , title , str ( year ) , str ( season ) ] ) ) [ : 8 ] + \
get_hash ( ' \n ' . join ( [ str ( episode ) , episode_director , episode_title , str ( episode_year ) ] ) ) [ : 8 ]
return u ' 0x ' + oxid