parse_paths -> parse_item_files (improved, untested)
This commit is contained in:
parent
0979f1edda
commit
3e232b9a1e
1 changed files with 107 additions and 80 deletions
187
ox/movie.py
187
ox/movie.py
|
@ -49,7 +49,7 @@ def format_path(data, has_director_directory=True):
|
|||
'.%s' % data['version'] if data['version'] else '',
|
||||
'.Part %s' % data['part'] if data['part'] else '',
|
||||
'.%s' % data['partTitle'] if data['partTitle'] else '',
|
||||
'.%s' % language.replace('/', '.') if language != None else '',
|
||||
'.%s' % data['language'] if data['language'] else '',
|
||||
'.%s' % data['extension'] if data['extension'] else ''
|
||||
)
|
||||
]))
|
||||
|
@ -58,6 +58,111 @@ def format_path(data, has_director_directory=True):
|
|||
return '/'.join(parts)
|
||||
|
||||
|
||||
|
||||
def parse_item_files(files):
|
||||
# parses a list of file objects associated with one item (file objects
|
||||
# as returned by parse_path, but extended with 'originalPath' and 'size')
|
||||
def get_file_key(file):
|
||||
return '\n'.join([
|
||||
file['version'], file['part'], file['language'], file['extension']]
|
||||
)
|
||||
def get_version_key(file, extension=True):
|
||||
return '%s.%s-part.%s' % (
|
||||
file['version'],
|
||||
'single' if file['part'] == None else 'multi',
|
||||
file['extension'] if extension else ''
|
||||
)
|
||||
# filter out duplicate files (keep shortest original path, sorted alphabetically)
|
||||
# since same version+part+language+extension can still differ in part title,
|
||||
# ''/'en' or 'mpg'/'mpeg', or have an unparsed section in their original path
|
||||
unique_files = []
|
||||
duplicate_files = []
|
||||
for key in [get_file_key(file) for file in files]:
|
||||
key_files = sorted(
|
||||
sorted([file['originalPath'] for file in files if get_file_key(file) == key]),
|
||||
key=lambda x: len(x)
|
||||
)
|
||||
unique_files.append(path_files[0])
|
||||
duplicate_files += path_files[1:]
|
||||
# determine versions ('version.single|multi-part.videoextension')
|
||||
version_files = {}
|
||||
size = {}
|
||||
video_files = [file for file in unique_files if file['type'] == 'video']
|
||||
versions = set([file['version'] for file in video_files])
|
||||
for version in versions:
|
||||
for file in [file for file in video_files if file['version'] == version]:
|
||||
version_key = get_version_key(file)
|
||||
version_files[version_key] = (version_files[version_key] or []) + [file]
|
||||
size[version_key] = (size[version_key] or 0) + file['size']
|
||||
# determine preferred video extension (largest size)
|
||||
extension = {}
|
||||
for key in set(['.'.join(version_key.split('.')[:-1] + '.') for version_key in version_files]):
|
||||
extensions = set([version_key.split('.')[-1] for version_key in version_files if version_key.startswith(key)])
|
||||
extension[key] = sorted(extensions, key=lambda x: size[key + x])[-1]
|
||||
# associate other (non-video) files
|
||||
other_files = [file for file in unique_files if file['type'] != 'video']
|
||||
versions = set([file['version'] for file in other_files])
|
||||
for version in versions:
|
||||
for file in [file for file in other_files if file['version'] == version]:
|
||||
key = get_version_key(file, extension=False)
|
||||
if key in extension:
|
||||
version_files[key + extension[key]].append(file)
|
||||
else:
|
||||
version_files[key] = (version_files[key] or []) + [file]
|
||||
extension[key] = None
|
||||
# determine main_files (video + subtitles)
|
||||
full = {}
|
||||
language = {}
|
||||
main_files = {}
|
||||
for version_key in version_files:
|
||||
parts = sorted(list(set([file['part'] for file in version_files[version_key]])))
|
||||
# determine if all parts have video
|
||||
video_files = [file for file in version_files[version_key] if file['type'] == 'video']
|
||||
full[version_key] = len(video_files) == len(parts)
|
||||
main_files[version_key] = video_files if full[version_key] else []
|
||||
# determine preferred subtitle language
|
||||
language[version_key] = None
|
||||
subtitle_files = [file for file in version_files[version] if file['extension'] == 'srt']
|
||||
for subtitle_language in sorted(
|
||||
list(set([file['language'] for file in subtitle_files])),
|
||||
key=lambda x: LANGUAGES.index(x) if x in LANGUAGES else x
|
||||
):
|
||||
language_files = [file for file in subtitle_files if file['language'] == subtitle_language]
|
||||
if len(subtitle_files) == len(parts):
|
||||
language[version_key] = subtitle_language
|
||||
main_files[version_key] += language_files
|
||||
break
|
||||
# determine main version (best subtitle language, then video size)
|
||||
main_version = None
|
||||
full_version_keys = sorted(
|
||||
[version_key for version_key in version_files if full[version_key]],
|
||||
key=lambda x: size[x]
|
||||
)
|
||||
if full_version_keys:
|
||||
language_version_keys = sorted(
|
||||
[version_key for version_key in full_version_keys if language[version_key]],
|
||||
key=lambda x: LANGUAGES.index(language[x]) if language[x] in LANGUAGES else language[x]
|
||||
)
|
||||
main_version = language_version_keys[0] if language_version_keys else full_version_keys[0]
|
||||
# add duplicate files
|
||||
for file in duplicate_files:
|
||||
version_files[get_version_key(file)].append(file)
|
||||
# return data
|
||||
data = {}
|
||||
for version_key in version_files:
|
||||
data[version_key] = {
|
||||
'files': sorted(
|
||||
[dict(file, isMainFile=file in main_files[version_key]) for file in version_files[version_key]],
|
||||
key=lambda x: x['originalPath']
|
||||
),
|
||||
'isFullVersion': full[version_key],
|
||||
'isMainVersion': version_key == main_version,
|
||||
'subtitleLanguage': languages[version_key][0] if version_key in languages else None,
|
||||
'videoSize': size[version_key] if version_key in size else None
|
||||
}
|
||||
return data
|
||||
|
||||
|
||||
def parse_path(path):
|
||||
'''
|
||||
# all keys
|
||||
|
@ -184,11 +289,7 @@ def parse_path(path):
|
|||
while data['partTitle'] and len(parts) and not re.search('^[a-z]{2}$', parts[0]):
|
||||
data['partTitle'] += '.%s' % parts.pop(0)
|
||||
# language
|
||||
data['language'] = None
|
||||
while len(parts) and re.search('^[a-z]{2}$', parts[0]):
|
||||
data['language'] = parts.pop(0) if not data['language'] else '%s/%s' % (
|
||||
data['language'], parts.pop(0)
|
||||
)
|
||||
data['language'] = parts.pop(0) if len(parts) and re.search('^[a-z]{2}$', parts[0]) else None
|
||||
# extension
|
||||
data['extension'] = re.sub('^mpeg$', 'mpg', extension.lower()) if extension else None
|
||||
# type
|
||||
|
@ -200,80 +301,6 @@ def parse_path(path):
|
|||
return data
|
||||
|
||||
|
||||
def parse_paths(paths):
|
||||
files = [dict(parse_path(path), originalPath=path) for path in sorted(paths)]
|
||||
data = {}
|
||||
version_files = {}
|
||||
versions = sorted(list(set([file['version'] for file in files])))
|
||||
for version in versions:
|
||||
files_by_version = [file for file in files if file['version'] == version]
|
||||
parts = sorted(list(set([file['part'] for file in files_by_version])))
|
||||
if parts[0] == None and len(parts) > 1:
|
||||
version_files[''] = [
|
||||
file for file in files_by_version if file['part'] == None
|
||||
]
|
||||
version_files['%s[multi-part]' % (' ' + version if version else '')] = [
|
||||
file for file in files_by_version if file['part'] != None
|
||||
]
|
||||
else:
|
||||
version_files[version or ''] = files_by_version
|
||||
versions = sorted(version_files.keys())
|
||||
for version in versions:
|
||||
# FIXME: make video_extensions and subtitle_languages local variables
|
||||
data[version] = {'isMainVersion': False, 'files': [], 'videoExtensions': [], 'subtitleLanguages': []}
|
||||
parts = sorted(list(set([file['part'] for file in version_files[version]])))
|
||||
# videoExtensions
|
||||
for extension in sorted(
|
||||
list(set([file['extension'] for file in version_files[version] if file['type'] == 'video']))
|
||||
):
|
||||
if len([
|
||||
file for file in version_files[version] if file['extension'] == extension
|
||||
]) >= len(parts):
|
||||
data[version]['videoExtensions'].append(extension)
|
||||
# subtitleLanguages
|
||||
for language in sorted(
|
||||
list(set([file['language'] for file in version_files[version] if file['extension'] == 'srt'])),
|
||||
key=lambda x: LANGUAGES.index(x) if x in LANGUAGES else x
|
||||
):
|
||||
if len([
|
||||
file for file in version_files[version] if file['extension'] == 'srt' and file['language'] == language
|
||||
]) >= len(parts):
|
||||
data[version]['subtitleLanguages'].append(language)
|
||||
# files
|
||||
for part in parts:
|
||||
files_by_part = [file for file in version_files[version] if file['part'] == part]
|
||||
videos = [
|
||||
file for file in files_by_part if file['extension'] == data[version]['videoExtensions'][0]
|
||||
] if data[version]['videoExtensions'] else []
|
||||
subtitles = [
|
||||
file for file in files_by_part if file['extension'] == 'srt' and file['language'] == data[version]['subtitleLanguages'][0]
|
||||
] if data[version]['subtitleLanguages'] else []
|
||||
for file in files_by_part:
|
||||
file['isMainFile'] = (
|
||||
len(videos) > 0 and file['originalPath'] == videos[0]['originalPath']
|
||||
) or (
|
||||
len(subtitles) > 0 and file['originalPath'] == subtitles[0]['originalPath']
|
||||
)
|
||||
data[version]['files'].append([
|
||||
{'isMainFile': file['isMainFile'], 'path': file['originalPath']} for file in files_by_part
|
||||
])
|
||||
# isMainVersion
|
||||
filtered = sorted(
|
||||
[version for version in versions if data[version]['videoExtensions'] and data[version]['subtitleLanguages']],
|
||||
key=lambda x: LANGUAGES.index(data[x]['subtitleLanguages'][0]) if data[x]['subtitleLanguages'][0] in LANGUAGES else data[x]['subtitleLanguages'][0]
|
||||
)
|
||||
if filtered:
|
||||
data[filtered[0]]['isMainVersion'] = True
|
||||
else:
|
||||
filtered = sorted(
|
||||
[version for version in versions if data[version]['videoExtensions']],
|
||||
key=lambda x: data[x]['videoExtensions'][0]
|
||||
)
|
||||
if filtered:
|
||||
data[filtered[0]]['isMainVersion'] = True
|
||||
return data
|
||||
|
||||
|
||||
def parse_movie_path(path):
|
||||
"""
|
||||
"A/Abrams, J.J.; Lieber, Jeffrey; Lindelof, Damon/Lost (2004)/Lost.Season 3.Episode 21.Greatest Hits.avi"
|
||||
|
|
Loading…
Reference in a new issue