581 lines
23 KiB
Python
Executable file
581 lines
23 KiB
Python
Executable file
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
# GPL 2012
|
|
from __future__ import division, with_statement
|
|
import datetime
|
|
import json
|
|
from optparse import OptionParser
|
|
import os
|
|
import ox
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import unicodedata
|
|
|
|
|
|
FILES = {
|
|
'config': 'pandoraclient.config.jsonc',
|
|
'errors': 'pandoraclient.errors.json',
|
|
'files': 'pandoraclient.files.%s.json',
|
|
'organize': 'pandoraclient.organize.jsonc',
|
|
'sync': 'pandoraclient.sync.jsonc'
|
|
}
|
|
|
|
|
|
def copy():
|
|
|
|
pass
|
|
|
|
|
|
def execute_copy():
|
|
|
|
pass
|
|
|
|
|
|
def organize():
|
|
|
|
def add_directory(path):
|
|
d = directories[-1]
|
|
for i, directory in enumerate(directories):
|
|
if ox.sort_string(path).lower() < ox.sort_string(directory).lower():
|
|
d = directories[max(i - 1, 0)]
|
|
break
|
|
return '%s/%s' % (d, path)
|
|
|
|
def remove_directory(path):
|
|
return '/'.join(path.split('/')[1:])
|
|
|
|
def is_system_file(filename):
|
|
return re.search('^\.(_|DS_Store$)', filename) != None
|
|
|
|
config = get_config()
|
|
volume = config['volumes'].keys()[0]
|
|
volume_path = config['volumes'][volume]
|
|
if isinstance(volume_path, str):
|
|
volume_path = volume_path.decode('utf-8')
|
|
if volume_path[-1] != '/':
|
|
volume_path += '/'
|
|
api = ox.API(config['url'])
|
|
FILES['files'] = FILES['files'] % volume
|
|
cache = {}
|
|
if os.path.exists(FILES['files']):
|
|
data = json.load(open(FILES['files']))
|
|
if data['path'] == volume_path:
|
|
for file in data['files']:
|
|
cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash']
|
|
directories = []
|
|
files = []
|
|
files_by_hash = {}
|
|
files_by_item = {}
|
|
issues = {
|
|
'duplicate files': [],
|
|
'empty directories': [],
|
|
'incorrect filenames': [],
|
|
'incorrect filenames (target exists)': [],
|
|
'missing subtitles': [],
|
|
'missing video': [],
|
|
'multiple versions': [],
|
|
'non-canonical filenames': [],
|
|
'non-canonical filenames (target exists)': [],
|
|
'system files': [],
|
|
'unexpected directories': [],
|
|
'unexpected files': [],
|
|
'unknown extensions': []
|
|
}
|
|
previous_path = ''
|
|
|
|
print 'Scanning %s' % volume_path
|
|
directories = sorted(os.listdir(volume_path))
|
|
for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True):
|
|
absolute_path = unicodedata.normalize('NFD', absolute_path)
|
|
relative_path = absolute_path[len(volume_path):]
|
|
for filename in ox.sorted_strings(filenames):
|
|
filename = unicodedata.normalize('NFD', filename)
|
|
if relative_path != previous_path and len(relative_path.split('/')) == 3:
|
|
print relative_path
|
|
previous_path = relative_path
|
|
full_path = os.path.join(absolute_path, filename)
|
|
path = os.path.join(relative_path, filename)
|
|
if is_system_file(filename):
|
|
issues['system files'].append(path)
|
|
else:
|
|
# unexpected files, unexpected directories
|
|
parts = path.split('/')
|
|
length = len(parts)
|
|
if length < 4:
|
|
issues['unexpected files'].append(path)
|
|
if length <= 4:
|
|
file = {}
|
|
file['path'] = path
|
|
file['size'] = os.path.getsize(full_path)
|
|
file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
|
|
key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
|
|
file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
|
|
files.append(file)
|
|
if not file['hash'] in files_by_hash:
|
|
files_by_hash[file['hash']] = []
|
|
files_by_hash[file['hash']].append(file)
|
|
elif not parts[3] in ['Extras', 'Versions']:
|
|
issues['unexpected directories'].append(path)
|
|
if not dirnames and (not filenames or not filter(lambda x: not is_system_file(x), filenames)):
|
|
issues['empty directories'].append(relative_path)
|
|
|
|
print 'Scanning for duplicates'
|
|
for hash in files_by_hash:
|
|
if len(files_by_hash[hash]) > 1:
|
|
issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash])))
|
|
|
|
print 'Scanning for inconsistencies'
|
|
paths = api.getPath({'id': files_by_hash.keys()})['data']
|
|
for file in files:
|
|
file = dict(file, **ox.movie.parse_path(remove_directory(file['path'])))
|
|
file['normalizedPath'] = add_directory(file['normalizedPath'])
|
|
# unknown extensions
|
|
if not file['type']:
|
|
issues['unknown extensions'].append(file['path'])
|
|
path_key = 'normalizedPath'
|
|
if file['hash'] in paths and paths[file['hash']] != remove_directory(file['path']):
|
|
file['correctedPath'] = add_directory(paths[file['hash']])
|
|
path_key = 'correctedPath'
|
|
if len(path.split('/')) == 4:
|
|
file['item'] = '%s/%s' % ('/'.join(file[path_key].split('/')[:-1]), file['title'])
|
|
if not file['item'] in files_by_item:
|
|
files_by_item[file['item']] = []
|
|
files_by_item[file['item']].append(file)
|
|
|
|
for item in sorted(files_by_item):
|
|
item_files = files_by_item[item]
|
|
# missing video, missing subtitles, multiple versions
|
|
versions = ox.movie.parse_item_files(item_files)
|
|
main_files = sum([version['files'] for version in versions if version['isMainVersion']], [])
|
|
other_files = sum([version['files'] for version in versions if not version['isMainVersion']], [])
|
|
if not main_files:
|
|
issues['missing video'].append([file['path'] for file in item_files])
|
|
else:
|
|
video_files = [file for file in main_files if file['type'] == 'video']
|
|
subtitle_files = [file for file in main_files if file['type'] == 'subtitle']
|
|
if subtitle_files and len(subtitle_files) < len(video_files):
|
|
issues['missing subtitles'].append([file['path'] for file in main_files])
|
|
if other_files:
|
|
issues['multiple versions'].append([file['path'] for file in main_files + other_files])
|
|
# incorrect filenames, non-canonical filenames
|
|
for version in versions:
|
|
path_key = {}
|
|
for file in version['files']:
|
|
path_key[file['path']] = 'correctedPath' if 'correctedPath' in file else 'normalizedPath'
|
|
rename = [[
|
|
file['path'], file[path_key[file['path']]]
|
|
] for file in version['files'] if file['path'] != file[path_key[file['path']]]]
|
|
languages = {'idx': [], 'srt': [], 'sub': []}
|
|
for extension in languages:
|
|
languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension])
|
|
if len(languages[extension]) == 1 and 'en' in languages[extension]:
|
|
# only english subtitles
|
|
regexp = '\.en(?=\.%s$)' % extension
|
|
# don't add '.en'
|
|
rename = [paths for paths in rename if not re.search(regexp, paths[1])]
|
|
# remove '.en'
|
|
rename += [[
|
|
file['path'], re.sub(regexp, '', file['path'])
|
|
] for file in version['files'] if re.search(regexp, file['path'])]
|
|
for paths in rename:
|
|
issues['%s filenames' % (
|
|
'incorrect' if path_key[paths[0]] == 'correctedPath' else 'non-canonical'
|
|
)].append(paths)
|
|
|
|
print 'Scanning for conflicts'
|
|
existing_paths = [file['path'] for file in files]
|
|
for key in ['incorrect filenames', 'non-canonical filenames']:
|
|
exists_key = '%s (target exists)' % key
|
|
path_count = {}
|
|
for path in [paths[1] for paths in issues[key]]:
|
|
path_count[path] = 1 if not path in path_count else path_count[path] + 1
|
|
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
|
|
if path_count[paths[1]] > 1:
|
|
# multiple files with the same target path
|
|
issues[key].remove(paths)
|
|
if not paths in issues[exists_key]:
|
|
issues[exists_key].append(paths)
|
|
elif not filenames[1] in existing_paths:
|
|
# target path does not exist, remove original path from existing paths
|
|
existing_paths.remove(paths[0])
|
|
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
|
|
if paths[1] in existing_paths:
|
|
# target path exists
|
|
issues[key].remove(paths)
|
|
if not paths in issues[exists_key]:
|
|
issues[exists_key].append(paths)
|
|
|
|
for issue in issues:
|
|
if issues[issue]:
|
|
if isinstance(issues[issue][0], str):
|
|
issues[issue] = sorted(issues[issue])
|
|
else:
|
|
issues[issue] = sorted(issues[issue], key=lambda x: x[0])
|
|
keys = {
|
|
'automatic': {
|
|
'remove': [
|
|
'empty directories',
|
|
'system files',
|
|
'unexpected files',
|
|
'unknown extensions'
|
|
],
|
|
'rename': [
|
|
'incorrect filenames',
|
|
'non-canonical filenames'
|
|
]
|
|
},
|
|
'manual': {
|
|
'rename': [
|
|
'incorrect filenames (target exists)',
|
|
'non-canonical filenames (target exists)'
|
|
],
|
|
'resolve': [
|
|
'duplicate files',
|
|
'missing subtitles',
|
|
'missing video',
|
|
'multiple versions',
|
|
'unexpected directories'
|
|
]
|
|
}
|
|
}
|
|
|
|
print 'Writing %s' % FILES['files']
|
|
data = ['{']
|
|
data.append(4 * ' ' + '"directories": ' + json.dumps(directories) + ',')
|
|
data.append(4 * ' ' + '"files": [')
|
|
for f, file in enumerate(files):
|
|
data.append(8 * ' ' + json.dumps({
|
|
'hash': file['hash'],
|
|
'path': file['path'],
|
|
'size': file['size'],
|
|
'time': file['time']
|
|
}, sort_keys=True) + (',' if f < len(files) - 1 else ''))
|
|
data.append(4 * ' ' + '],')
|
|
data.append(4 * ' ' + '"path": ' + json.dumps(volume_path) + ',')
|
|
data.append(4 * ' ' + '"totals": {"files": %d, "size": %d' % (
|
|
len(files), sum([file['size'] for file in files])
|
|
) + '}')
|
|
data.append('}')
|
|
with open(FILES['files'], 'w') as f:
|
|
f.write('\n'.join(data))
|
|
|
|
print 'Writing %s' % FILES['organize']
|
|
data = ['{']
|
|
sections = sorted(keys)
|
|
for s, section in enumerate(sections):
|
|
data.append('')
|
|
data.append(4 * ' ' + '"%s": {' % section)
|
|
actions = sorted(keys[section])
|
|
for a, action in enumerate(actions):
|
|
data.append('')
|
|
data.append(8 * ' ' + '"%s": [' % action)
|
|
issue_keys = keys[section][action]
|
|
for i, issue in enumerate(issue_keys):
|
|
data.append('')
|
|
data.append(12 * ' ' + '/* %s */' % issue)
|
|
if issues[issue]:
|
|
for line in json.dumps(issues[issue], indent=4).split('\n')[1:-1]:
|
|
data.append(8 * ' ' + line)
|
|
if i < len(issue_keys) - 1:
|
|
data[-1] += ','
|
|
data.append('')
|
|
data.append(8 * ' ' + ']' + (',' if a < len(actions) - 1 else ''))
|
|
data.append('')
|
|
data.append(4 * ' ' + '}' + (',' if s < len(sections) - 1 else ''))
|
|
data.append('}')
|
|
with open(FILES['organize'], 'w') as f:
|
|
f.write('\n'.join(data))
|
|
print 'Next, edit %s and run pandoraclient organize -x' % FILES['organize']
|
|
|
|
|
|
def execute_organize():
|
|
|
|
def rename_file(source, target):
|
|
if not os.path.exists(source):
|
|
raise IOError('Source does not exist')
|
|
elif os.path.exists(target):
|
|
raise IOError('Target exists')
|
|
else:
|
|
ox.makedirs(os.path.dirname(target))
|
|
shutil.move(source, target)
|
|
|
|
config = get_config()
|
|
volume = config['volumes'].keys()[0]
|
|
volume_path = config['volumes'][volume]
|
|
|
|
if not os.path.exists(FILES['organize']):
|
|
sys.exit('%s not found' % FILES['organize'])
|
|
|
|
data = ox.jsonc.load(open(FILES['organize']))
|
|
remove = map(lambda x: os.path.join(volume_path, x), data['automatic']['remove'])
|
|
rename = map(lambda x: map(lambda y: os.path.join(volume_path, y), x), data['automatic']['rename'])
|
|
errors = []
|
|
for path in remove:
|
|
print 'Removing "%s"' % path
|
|
try:
|
|
if os.path.isdir(path):
|
|
os.rmdir(path)
|
|
else:
|
|
os.remove(path)
|
|
except:
|
|
errors.append('Could not remove "%s"' % path)
|
|
for paths in rename:
|
|
source = paths[0]
|
|
target = paths[1] + '.pandora'
|
|
print 'Renaming "%s" to "%s"' % (source, target)
|
|
try:
|
|
rename_file(source, target)
|
|
except IOError as error:
|
|
errors.append('Could not rename "%s" to "%s" (%s)' % (source, target, error))
|
|
for paths in rename:
|
|
source = paths[1] + '.pandora'
|
|
target = paths[1]
|
|
print 'Renaming "%s" to "%s"' % (source, target)
|
|
try:
|
|
rename_file(source, target)
|
|
except IOError as error:
|
|
errors.append('Could not rename "%s" to "%s" (%s)' % (source, target, error))
|
|
for error in errors:
|
|
print error
|
|
|
|
|
|
def sync():
|
|
|
|
def get_directories(files):
|
|
directories = {}
|
|
for file in files:
|
|
# path = os.path.dirname(file['path'])
|
|
path = '/'.join(file['path'].split('/')[:3])
|
|
if not path in directories:
|
|
directories[path] = {'files': []}
|
|
directories[path]['files'].append(file)
|
|
for path in directories:
|
|
directories[path]['hashes'] = '\n'.join(sorted(
|
|
[file['hash'] for file in directories[path]['files']]
|
|
))
|
|
directories[path]['paths'] = '\n'.join(sorted(
|
|
[file['path'] for file in directories[path]['files']]
|
|
))
|
|
return directories
|
|
|
|
if len(sys.argv) != 3:
|
|
print 'Usage: %s sync targetfiles.json' % sys.argv[0]
|
|
sys.exit(1)
|
|
|
|
print 'Reading files'
|
|
source_file = FILES['files']
|
|
target_file = sys.argv[2]
|
|
files = [ox.json.load(open(file))['files'] for file in [source_file, target_file]]
|
|
directories = [get_directories(file) for file in files]
|
|
file_path_by_hash = [{}, {}]
|
|
hashes = [{}, {}]
|
|
paths = [{}, {}]
|
|
for i in [0, 1]:
|
|
for file in files[i]:
|
|
if file['hash'] in file_path_by_hash[i]:
|
|
print 'Duplicate file:'
|
|
print file_path_by_hash[i][file['hash']]
|
|
print file['path']
|
|
sys.exit()
|
|
file_path_by_hash[i][file['hash']] = file['path']
|
|
for path in sorted(directories[i]):
|
|
directory = directories[i][path]
|
|
hashes[i][directory['hashes']] = path
|
|
paths[i][directory['paths']] = path
|
|
|
|
print 'Determining files to sync'
|
|
sync = {
|
|
'same files, different filenames': [],
|
|
'same file, different filename': [],
|
|
'unique directory': [],
|
|
'unique file': []
|
|
}
|
|
for path in sorted(directories[0]):
|
|
directory = directories[0][path]
|
|
file_hashes = [
|
|
directory['hashes'].split('\n'),
|
|
directories[1][path]['hashes'].split('\n')
|
|
]
|
|
if directory['hashes'] in hashes[1]:
|
|
# same files
|
|
if not directory['paths'] in paths[1]:
|
|
# different filenames
|
|
rename = []
|
|
for file in directory['files']:
|
|
if file['path'] != file_path_by_hash[1][file['hash']]:
|
|
rename.append([file['path'], file_path_by_hash[1][file['hash']]])
|
|
sync['same files, different filenames'].append(rename)
|
|
for hash in file_hashes[0]:
|
|
for i in [0, 1]:
|
|
del file_path_by_hash[i][hash]
|
|
else:
|
|
for hash in file_hashes[0]:
|
|
if not hash in file_hashes[1]:
|
|
if hash in file_path_by_hash[1]:
|
|
sync['same file, different filename'].append([
|
|
file_path_by_hash[0][hash],
|
|
file_path_by_hash[1][hash]
|
|
])
|
|
del file_path_by_hash[0][hash]
|
|
del file_path_by_hash[1][hash]
|
|
else:
|
|
sync['unique file'].append([
|
|
file_path_by_hash[0][hash],
|
|
None
|
|
])
|
|
del file_path_by_hash[0][hash]
|
|
for hash in [hash for hash in file_hashes[1] if hash in file_path_by_hash[1]]:
|
|
sync['unique file'].append([
|
|
None,
|
|
file_path_by_hash[0][hash]
|
|
])
|
|
'''
|
|
elif path in directories[1] and directory['hashes'] != directories[1][path]['hashes']:
|
|
# same directory, different files
|
|
file_hashes_1 = directories[1][path]['hashes'].split('\n')
|
|
if file_hashes_1[0] in file_path_by_hash[1]:
|
|
# directory in b not handled in previous step
|
|
sync['same directory, different files'].append([
|
|
[file['path'] for file in directory['files']],
|
|
[file['path'] for file in directories[1][path]['files']]
|
|
])
|
|
for hash in file_hashes:
|
|
del file_path_by_hash[0][hash]
|
|
for hash in file_hashes_1:
|
|
del file_path_by_hash[1][hash]
|
|
elif len(
|
|
[hash for hash in file_hashes if hash in file_path_by_hash[0] and not hash in file_path_by_hash[1]]
|
|
) == len(file_hashes):
|
|
sync['unique directory'].append([directory['files'], None])
|
|
for hash in file_hashes:
|
|
del file_path_by_hash[0][hash]
|
|
for path in sorted(directories[1]):
|
|
directory = directories[1][path]
|
|
file_hashes = directory['hashes'].split('\n')
|
|
if len(
|
|
[hash for hash in file_hashes if hash in file_path_by_hash[1] and not hash in file_path_by_hash[0]]
|
|
) == len(file_hashes):
|
|
sync['unique directory'].append([None, directory['paths']])
|
|
for hash in file_hashes:
|
|
del file_path_by_hash[1][hash]
|
|
for hash in file_path_by_hash[0]:
|
|
if hash in file_path_by_hash[1]:
|
|
sync['same file, different filenames'].append(
|
|
[file_path_by_hash[0][hash], file_path_by_hash[1][hash]]
|
|
)
|
|
'''
|
|
|
|
print 'Writing %s' % FILES['sync']
|
|
data = ['[']
|
|
data.append('')
|
|
for key in [
|
|
'same files, different filenames',
|
|
'same directory, different files',
|
|
'same file, different directory',
|
|
'unique directory'
|
|
]:
|
|
data.append('')
|
|
data.append(4 * ' ' + '/* %s */' % key)
|
|
for paths in sync[key]:
|
|
data.append('')
|
|
if key == 'same files, different filenames':
|
|
data.append(4 * ' ' + '/* rename in b */')
|
|
data.append(',\n'.join([4 * ' ' + json.dumps(['b', 'rename'] + x) for x in paths]) + ',')
|
|
data.append(4 * ' ' + '/* rename in a */')
|
|
data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['a', 'rename'] + list(reversed(x))) for x in paths]) + ',')
|
|
elif key == 'same directory, different files':
|
|
data.append(4 * ' ' + '/* remove in b, copy from a to b */')
|
|
data.append(',\n'.join([4 * ' ' + json.dumps(['b', 'remove', x]) for x in paths[1]]) + ',')
|
|
data.append(',\n'.join([4 * ' ' + json.dumps(['a', 'copy', x]) for x in paths[0]]) + ',')
|
|
data.append(4 * ' ' + '/* remove in a, copy from b to a */')
|
|
data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['a', 'remove', x]) for x in paths[0]]) + ',')
|
|
data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['b', 'copy', x]) for x in paths[1]]) + ',')
|
|
elif key == 'same file, different directory':
|
|
data.append(4 * ' ' + '/* rename in b */')
|
|
print json.dumps(paths, indent=4)
|
|
data.append(4 * ' ' + json.dumps(['b', 'rename'] + paths) + ',')
|
|
data.append(4 * ' ' + '/* rename in a */')
|
|
data.append(4 * ' ' + '// ' + json.dumps(['a', 'rename'] + list(reversed(paths))) + ',')
|
|
else:
|
|
copy = ['a', 'b'] if paths[0] else ['b', 'a']
|
|
i = 0 if paths[0] else 1
|
|
data.append(4 * ' ' + '/* copy from %s to %s */' % (copy[0], copy[1]))
|
|
data.append('\n'.join([4 * ' ' + json.dumps([copy[0], 'copy', x]) for x in paths[i]]) + ',')
|
|
data.append(4 * ' ' + '/* remove in %s */' % copy[0])
|
|
data.append('\n'.join([4 * ' ' + '// ' + json.dumps([copy[0], 'remove', x]) for x in paths[i]]) + ',')
|
|
data[-1] = data[-1][:-1]
|
|
data.append('')
|
|
data.append(']')
|
|
with open(FILES['sync'], 'w') as f:
|
|
f.write('\n'.join(data))
|
|
|
|
|
|
def execute_sync():
|
|
|
|
pass
|
|
|
|
|
|
def update():
|
|
|
|
pass
|
|
|
|
|
|
def execute_update():
|
|
|
|
pass
|
|
|
|
|
|
def get_config():
|
|
if not os.path.exists(FILES['config']):
|
|
sys.exit('%s not found' % FILES['config'])
|
|
with open(FILES['config']) as f:
|
|
config = ox.jsonc.load(f)
|
|
return config
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
actions = ['copy', 'organize', 'sync', 'upload']
|
|
action_string = '%s or %s' % (', '.join(actions[:-1]), actions[-1])
|
|
usage = 'usage: %prog action [volume] [options]'
|
|
description = 'Action: %s' % action_string
|
|
parser = OptionParser(usage=usage, description=description)
|
|
parser.add_option(
|
|
'-e', '--encode', action='store_true', dest='encode',
|
|
help='encode only, do not upload'
|
|
)
|
|
parser.add_option(
|
|
'-v', '--verbose', action='store_true', dest='verbose',
|
|
help='verbose output'
|
|
)
|
|
parser.add_option(
|
|
'-x', '--execute', action='store_true', dest='execute',
|
|
help='execute %s' % action_string
|
|
)
|
|
(opts, args) = parser.parse_args()
|
|
if len(args) == 0:
|
|
sys.exit(parser.get_usage())
|
|
action = args[0]
|
|
args = args[1:]
|
|
if action == 'copy':
|
|
if not opts.execute:
|
|
copy()
|
|
else:
|
|
copy_x()
|
|
elif action == 'organize':
|
|
if not opts.execute:
|
|
organize()
|
|
else:
|
|
execute_organize()
|
|
elif action == 'sync':
|
|
if not opts.execute:
|
|
sync()
|
|
else:
|
|
execute_sync()
|
|
elif action == 'upload':
|
|
if not opts.execute:
|
|
upload()
|
|
else:
|
|
execute_upload()
|