pandoraclient/pandoraclient

656 lines
26 KiB
Python
Executable File

#!/usr/bin/python
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# GPL 2012
from __future__ import division, with_statement
import datetime
import json
from optparse import OptionParser
import os
import ox
import re
import shutil
import sys
import unicodedata
FILES = {
'config': 'pandoraclient.config.jsonc',
'errors': 'pandoraclient.errors.json',
'files': 'pandoraclient.files.%s.json',
'organize': 'pandoraclient.organize.jsonc',
'sync': 'pandoraclient.sync.jsonc'
}
DIRECTORIES = ['Extras', 'Segments', 'Versions']
def copy():
pass
def execute_copy():
pass
def organize():
def add_directory(path):
d = directories[-1]
for i, directory in enumerate(directories):
if ox.sort_string(path).lower() < ox.sort_string(directory).lower():
d = directories[max(i - 1, 0)]
break
return '%s/%s' % (d, path)
def remove_directory(path):
return '/'.join(path.split('/')[1:])
def is_system_file(filename):
return re.search('^\.(_|DS_Store$)', filename) != None
config = get_config()
volume = config['volumes'].keys()[0]
volume_path = config['volumes'][volume]
if isinstance(volume_path, str):
volume_path = volume_path.decode('utf-8')
if volume_path[-1] != '/':
volume_path += '/'
api = ox.API(config['url'])
FILES['files'] = FILES['files'] % volume
cache = {}
if os.path.exists(FILES['files']):
data = json.load(open(FILES['files']))
if data['path'] == volume_path:
for file in data['files']:
cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash']
directories_by_path = {}
files = []
files_by_hash = {}
files_by_item = {}
files_by_path = {}
issues = {
'duplicate files': [],
'empty directories': [],
'incorrect filenames': [],
'incorrect filenames (target exists)': [],
'missing files': [],
'missing subtitles': [],
'missing video': [],
'multiple versions': [],
'non-canonical filenames': [],
'non-canonical filenames (target exists)': [],
'system files': [],
'unexpected directories': [],
'unexpected files': [],
'unknown extensions': []
}
previous_path = ''
print 'Scanning %s' % volume_path
directories = sorted(os.listdir(volume_path))
for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True):
relative_path = unicodedata.normalize('NFD', absolute_path)[len(volume_path):]
parts = relative_path.split('/')
length = len(parts)
for filename in ox.sorted_strings(filenames):
full_path = os.path.join(absolute_path, filename)
filename = unicodedata.normalize('NFD', filename)
if relative_path != previous_path and length == 3:
print relative_path
previous_path = relative_path
path = os.path.join(relative_path, filename)
if is_system_file(filename):
issues['system files'].append(path)
else:
# unexpected files, unexpected directories
parts = path.split('/')
if length < 3:
issues['unexpected files'].append(path)
if length <= 3:
file = {}
file['path'] = path
file['size'] = os.path.getsize(full_path)
file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
files.append(file)
if not file['hash'] in files_by_hash:
files_by_hash[file['hash']] = []
files_by_hash[file['hash']].append(file)
if not relative_path in files_by_path:
files_by_path[relative_path] = []
files_by_path[relative_path].append(path)
elif parts[3] in DIRECTORIES:
path_key = '/'.join(parts[:3])
if not path_key in directories_by_path:
directories_by_path[path_key] = []
directory = '/'.join(parts[:4])
if not directory in directories_by_path[path_key]:
directories_by_path[path_key].append(directory)
else:
issues['unexpected directories'].append(path)
# empty directories, missing files
if not filenames or not filter(lambda x: not is_system_file(x), filenames):
if not dirnames:
issues['empty directories'].append(relative_path)
elif length == 3:
issues['missing files'].append(relative_path)
print 'Scanning for duplicates'
for hash in files_by_hash:
if len(files_by_hash[hash]) > 1:
issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash])))
print 'Scanning for inconsistencies'
paths = api.getPath({'id': files_by_hash.keys()})['data']
for file in files:
file = dict(file, **ox.movie.parse_path(remove_directory(file['path'])))
file['normalizedPath'] = add_directory(file['normalizedPath'])
# unknown extensions
if not file['type']:
issues['unknown extensions'].append(file['path'])
path_key = 'normalizedPath'
if file['hash'] in paths and paths[file['hash']] != remove_directory(file['path']):
file['correctedPath'] = add_directory(paths[file['hash']])
path_key = 'correctedPath'
if len(path.split('/')) == 4:
file['item'] = '%s/%s' % ('/'.join(file[path_key].split('/')[:-1]), file['title'])
if not file['item'] in files_by_item:
files_by_item[file['item']] = []
files_by_item[file['item']].append(file)
for item in sorted(files_by_item):
item_files = files_by_item[item]
# missing video, missing subtitles, multiple versions
versions = ox.movie.parse_item_files(item_files)
main_files = sum([version['files'] for version in versions if version['isMainVersion']], [])
other_files = sum([version['files'] for version in versions if not version['isMainVersion']], [])
if not main_files:
issues['missing video'].append([file['path'] for file in item_files])
else:
video_files = [file for file in main_files if file['type'] == 'video']
subtitle_files = [file for file in main_files if file['type'] == 'subtitle']
if subtitle_files and len(subtitle_files) < len(video_files):
issues['missing subtitles'].append([file['path'] for file in main_files])
if other_files:
issues['multiple versions'].append([file['path'] for file in main_files + other_files])
# incorrect filenames, non-canonical filenames
for version in versions:
path_key = {}
for file in version['files']:
path_key[file['path']] = 'correctedPath' if 'correctedPath' in file else 'normalizedPath'
rename = [[
file['path'], file[path_key[file['path']]]
] for file in version['files'] if file['path'] != file[path_key[file['path']]]]
languages = {'idx': [], 'srt': [], 'sub': []}
for extension in languages:
languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension])
if len(languages[extension]) == 1 and 'en' in languages[extension]:
# only english subtitles
regexp = '\.en(?=\.%s$)' % extension
# don't add '.en'
rename = [paths for paths in rename if not re.search(regexp, paths[1])]
# remove '.en'
rename += [[
file['path'], re.sub(regexp, '', file[path_key[file['path']]])
] for file in version['files'] if re.search(regexp, file[path_key[file['path']]]) and file['path'] != re.sub(regexp, '', file[path_key[file['path']]])]
for paths in rename:
issues['%s filenames' % (
'incorrect' if path_key[paths[0]] == 'correctedPath' else 'non-canonical'
)].append(paths)
print 'Scanning for conflicts'
existing_paths = [file['path'] for file in files]
for key in ['incorrect filenames', 'non-canonical filenames']:
exists_key = '%s (target exists)' % key
path_count = {}
for path in [paths[1] for paths in issues[key]]:
path_count[path] = 1 if not path in path_count else path_count[path] + 1
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
if path_count[paths[1]] > 1:
# multiple files with the same target path
issues[key].remove(paths)
if not paths in issues[exists_key]:
issues[exists_key].append(paths)
elif not paths[1] in existing_paths:
# target path does not exist, remove original path from existing paths
existing_paths.remove(paths[0])
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
if paths[1] in existing_paths:
# target path exists
issues[key].remove(paths)
if not paths in issues[exists_key]:
issues[exists_key].append(paths)
# orphaned directories
new_path = {}
rename_key = {}
for key in ['incorrect filenames', 'non-canonical filenames']:
for i, paths in enumerate(issues[key]):
new_path[paths[0]] = '/'.join(paths[1].split('/')[:-1])
if not new_path[paths[0]] in rename_key:
rename_key[new_path[paths[0]]] = key
for path in [path for path in files_by_path if path in directories_by_path]:
new_paths = []
for path_file in files_by_path[path]:
if path_file in new_path:
new_paths.append(new_path[path_file])
if len(new_paths) == len(files_by_path[path]) and len(set(new_paths)) == 1 and new_paths[0] != path:
# per path, if all files get moved to the same path, move directories too
for directory in directories_by_path[path]:
new_directory = os.path.join(new_paths[0], directory.split('/')[-1])
key = rename_key[new_paths[0]]
exists_key = '%s (target exists)' % key
issues[exists_key if os.path.exists(new_directory) else key].append([directory, new_directory])
for key in ['incorrect filenames', 'non-canonical filenames']:
for key in [key, '%s (target exists)' % key]:
issues[key] = sorted(issues[key], key=lambda x: x[0].lower())
for issue in issues:
if issues[issue]:
if isinstance(issues[issue][0], str):
issues[issue] = sorted(issues[issue])
else:
issues[issue] = sorted(issues[issue], key=lambda x: x[0])
keys = {
'automatic': {
'remove': [
'empty directories',
'system files',
'unexpected files',
'unknown extensions'
],
'rename': [
'incorrect filenames',
'non-canonical filenames'
]
},
'manual': {
'rename': [
'incorrect filenames (target exists)',
'non-canonical filenames (target exists)'
],
'resolve': [
'duplicate files',
'missing files',
'missing subtitles',
'missing video',
'multiple versions',
'unexpected directories'
]
}
}
print 'Writing %s' % FILES['files']
data = ['{']
data.append(4 * ' ' + '"directories": ' + get_json(directories) + ',')
data.append(4 * ' ' + '"files": [')
for f, file in enumerate(files):
data.append(8 * ' ' + get_json({
'hash': file['hash'],
'path': file['path'],
'size': file['size'],
'time': file['time']
}, sort_keys=True) + (',' if f < len(files) - 1 else ''))
data.append(4 * ' ' + '],')
data.append(4 * ' ' + '"path": ' + get_json(volume_path) + ',')
data.append(4 * ' ' + '"totals": {"files": %d, "size": %d' % (
len(files), sum([file['size'] for file in files])
) + '}')
data.append('}')
write_file(FILES['files'], u'\n'.join(data))
print 'Writing %s' % FILES['organize']
data = ['{']
sections = sorted(keys)
for s, section in enumerate(sections):
data.append('')
data.append(4 * ' ' + '"%s": {' % section)
actions = sorted(keys[section])
for a, action in enumerate(actions):
data.append('')
data.append(8 * ' ' + '"%s": [' % action)
issue_keys = keys[section][action]
for i, issue in enumerate(issue_keys):
data.append('')
data.append(12 * ' ' + '/* %s */' % issue)
if issues[issue]:
for line in get_json(issues[issue], indent=4).split('\n')[1:-1]:
data.append(8 * ' ' + line)
if i < len(issue_keys) - 1:
data[-1] += ','
data.append('')
data.append(8 * ' ' + ']' + (',' if a < len(actions) - 1 else ''))
data.append('')
data.append(4 * ' ' + '}' + (',' if s < len(sections) - 1 else ''))
data.append('}')
write_file(FILES['organize'], u'\n'.join(data))
print 'Next, edit %s and run pandoraclient organize -x' % FILES['organize']
def execute_organize():
def get_empty_directories():
empty_directories = []
for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True):
if not dirnames and not filenames:
empty_directories.append(absolute_path)
return empty_directories
def remove_file(path):
print 'Removing "%s"' % path
try:
if os.path.isdir(path):
os.rmdir(path)
else:
os.remove(path)
except:
raise IOError('Could not remove file')
def rename_file(source, target):
print 'Renaming "%s" to "%s"' % (source, target)
if not os.path.exists(source):
raise IOError('Source does not exist')
elif os.path.exists(target):
raise IOError('Target exists')
else:
ox.makedirs(os.path.dirname(target))
shutil.move(source, target)
config = get_config()
volume = config['volumes'].keys()[0]
volume_path = config['volumes'][volume]
if not os.path.exists(FILES['organize']):
sys.exit('%s not found' % FILES['organize'])
data = ox.jsonc.load(open(FILES['organize']))
old_empty_directories = get_empty_directories()
remove = map(lambda x: os.path.join(volume_path, x), data['automatic']['remove'])
rename = map(lambda x: map(lambda y: os.path.join(volume_path, y), x), data['automatic']['rename'])
errors = []
for path in remove:
try:
remove_file(path)
except:
errors.append('Could not remove "%s"' % path)
for paths in rename:
source = paths[0]
target = paths[1] + '.pandora'
try:
rename_file(source, target)
except IOError as error:
errors.append('Could not rename "%s" to "%s" (%s)' % (source, target, error))
for paths in rename:
source = paths[1] + '.pandora'
target = paths[1]
try:
rename_file(source, target)
except IOError as error:
errors.append('Could not rename "%s" to "%s" (%s)' % (source, target, error))
while True:
new_empty_directories = [path for path in get_empty_directories() if not path in old_empty_directories]
if new_empty_directories:
for path in new_empty_directories:
try:
remove_file(path)
except:
errors.append('Could not remove "%s"' % path)
else:
break
for error in errors:
print error
def sync():
def get_directories(files):
directories = {}
for file in files:
# path = os.path.dirname(file['path'])
path = '/'.join(file['path'].split('/')[:3])
if not path in directories:
directories[path] = {'files': []}
directories[path]['files'].append(file)
for path in directories:
directories[path]['hashes'] = '\n'.join(sorted(
[file['hash'] for file in directories[path]['files']]
))
directories[path]['paths'] = '\n'.join(sorted(
[file['path'] for file in directories[path]['files']]
))
return directories
if len(sys.argv) != 3:
print 'Usage: %s sync targetfiles.json' % sys.argv[0]
sys.exit(1)
print 'Reading files'
source_file = FILES['files']
target_file = sys.argv[2]
files = [ox.json.load(open(file))['files'] for file in [source_file, target_file]]
directories = [get_directories(file) for file in files]
file_path_by_hash = [{}, {}]
hashes = [{}, {}]
paths = [{}, {}]
for i in [0, 1]:
for file in files[i]:
if file['hash'] in file_path_by_hash[i]:
print 'Duplicate file:'
print file_path_by_hash[i][file['hash']]
print file['path']
sys.exit()
file_path_by_hash[i][file['hash']] = file['path']
for path in sorted(directories[i]):
directory = directories[i][path]
hashes[i][directory['hashes']] = path
paths[i][directory['paths']] = path
print 'Determining files to sync'
sync = {
'same files, different filenames': [],
'same file, different filename': [],
'unique directory': [],
'unique file': []
}
for path in sorted(directories[0]):
directory = directories[0][path]
file_hashes = [
directory['hashes'].split('\n'),
directories[1][path]['hashes'].split('\n')
]
if directory['hashes'] in hashes[1]:
# same files
if not directory['paths'] in paths[1]:
# different filenames
rename = []
for file in directory['files']:
if file['path'] != file_path_by_hash[1][file['hash']]:
rename.append([file['path'], file_path_by_hash[1][file['hash']]])
sync['same files, different filenames'].append(rename)
for hash in file_hashes[0]:
for i in [0, 1]:
del file_path_by_hash[i][hash]
else:
for hash in file_hashes[0]:
if not hash in file_hashes[1]:
if hash in file_path_by_hash[1]:
sync['same file, different filename'].append([
file_path_by_hash[0][hash],
file_path_by_hash[1][hash]
])
del file_path_by_hash[0][hash]
del file_path_by_hash[1][hash]
else:
sync['unique file'].append([
file_path_by_hash[0][hash],
None
])
del file_path_by_hash[0][hash]
for hash in [hash for hash in file_hashes[1] if hash in file_path_by_hash[1]]:
sync['unique file'].append([
None,
file_path_by_hash[0][hash]
])
'''
elif path in directories[1] and directory['hashes'] != directories[1][path]['hashes']:
# same directory, different files
file_hashes_1 = directories[1][path]['hashes'].split('\n')
if file_hashes_1[0] in file_path_by_hash[1]:
# directory in b not handled in previous step
sync['same directory, different files'].append([
[file['path'] for file in directory['files']],
[file['path'] for file in directories[1][path]['files']]
])
for hash in file_hashes:
del file_path_by_hash[0][hash]
for hash in file_hashes_1:
del file_path_by_hash[1][hash]
elif len(
[hash for hash in file_hashes if hash in file_path_by_hash[0] and not hash in file_path_by_hash[1]]
) == len(file_hashes):
sync['unique directory'].append([directory['files'], None])
for hash in file_hashes:
del file_path_by_hash[0][hash]
for path in sorted(directories[1]):
directory = directories[1][path]
file_hashes = directory['hashes'].split('\n')
if len(
[hash for hash in file_hashes if hash in file_path_by_hash[1] and not hash in file_path_by_hash[0]]
) == len(file_hashes):
sync['unique directory'].append([None, directory['paths']])
for hash in file_hashes:
del file_path_by_hash[1][hash]
for hash in file_path_by_hash[0]:
if hash in file_path_by_hash[1]:
sync['same file, different filenames'].append(
[file_path_by_hash[0][hash], file_path_by_hash[1][hash]]
)
'''
print 'Writing %s' % FILES['sync']
data = ['[']
data.append('')
for key in [
'same files, different filenames',
'same directory, different files',
'same file, different directory',
'unique directory'
]:
data.append('')
data.append(4 * ' ' + '/* %s */' % key)
for paths in sync[key]:
data.append('')
if key == 'same files, different filenames':
data.append(4 * ' ' + '/* rename in b */')
data.append(',\n'.join([4 * ' ' + get_json(['b', 'rename'] + x) for x in paths]) + ',')
data.append(4 * ' ' + '/* rename in a */')
data.append(',\n'.join([4 * ' ' + '// ' + get_json(['a', 'rename'] + list(reversed(x))) for x in paths]) + ',')
elif key == 'same directory, different files':
data.append(4 * ' ' + '/* remove in b, copy from a to b */')
data.append(',\n'.join([4 * ' ' + get_json(['b', 'remove', x]) for x in paths[1]]) + ',')
data.append(',\n'.join([4 * ' ' + get_json(['a', 'copy', x]) for x in paths[0]]) + ',')
data.append(4 * ' ' + '/* remove in a, copy from b to a */')
data.append(',\n'.join([4 * ' ' + '// ' + get_json(['a', 'remove', x]) for x in paths[0]]) + ',')
data.append(',\n'.join([4 * ' ' + '// ' + get_json(['b', 'copy', x]) for x in paths[1]]) + ',')
elif key == 'same file, different directory':
data.append(4 * ' ' + '/* rename in b */')
#print get_json(paths, indent=4).encode('utf-8')
data.append(4 * ' ' + get_json(['b', 'rename'] + paths) + ',')
data.append(4 * ' ' + '/* rename in a */')
data.append(4 * ' ' + '// ' + get_json(['a', 'rename'] + list(reversed(paths))) + ',')
else:
copy = ['a', 'b'] if paths[0] else ['b', 'a']
i = 0 if paths[0] else 1
data.append(4 * ' ' + '/* copy from %s to %s */' % (copy[0], copy[1]))
data.append('\n'.join([4 * ' ' + get_json([copy[0], 'copy', x]) for x in paths[i]]) + ',')
data.append(4 * ' ' + '/* remove in %s */' % copy[0])
data.append('\n'.join([4 * ' ' + '// ' + get_json([copy[0], 'remove', x]) for x in paths[i]]) + ',')
data[-1] = data[-1][:-1]
data.append('')
data.append(']')
write_file(FILES['sync'], u'\n'.join(data))
def execute_sync():
pass
def update():
pass
def execute_update():
pass
def get_config():
if not os.path.exists(FILES['config']):
sys.exit('%s not found' % FILES['config'])
with open(FILES['config']) as f:
config = ox.jsonc.load(f)
return config
def get_json(data, indent=None, sort_keys=False):
# return json.dumps(data, ensure_ascii=False, indent=indent, sort_keys=sort_keys)
return json.dumps(data, indent=indent, sort_keys=sort_keys)
def write_file(filename, data):
with open(filename, 'w') as f:
# f.write(data.encode('utf-8'))
f.write(data)
if __name__ == '__main__':
actions = ['copy', 'organize', 'sync', 'upload']
action_string = '%s or %s' % (', '.join(actions[:-1]), actions[-1])
usage = 'usage: %prog action [volume] [options]'
description = 'Action: %s' % action_string
parser = OptionParser(usage=usage, description=description)
parser.add_option(
'-e', '--encode', action='store_true', dest='encode',
help='encode only, do not upload'
)
parser.add_option(
'-v', '--verbose', action='store_true', dest='verbose',
help='verbose output'
)
parser.add_option(
'-x', '--execute', action='store_true', dest='execute',
help='execute %s' % action_string
)
(opts, args) = parser.parse_args()
if len(args) == 0:
sys.exit(parser.get_usage())
action = args[0]
args = args[1:]
if action == 'copy':
if not opts.execute:
copy()
else:
copy_x()
elif action == 'organize':
if not opts.execute:
organize()
else:
execute_organize()
elif action == 'sync':
if not opts.execute:
sync()
else:
execute_sync()
elif action == 'upload':
if not opts.execute:
upload()
else:
execute_upload()