#!/usr/bin/python # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 # GPL 2012 from __future__ import division, with_statement import datetime import json from optparse import OptionParser import os import ox import re import shutil import sys import unicodedata FILES = { 'config': 'pandoraclient.config.jsonc', 'errors': 'pandoraclient.errors.json', 'files': 'pandoraclient.files.%s.json', 'organize': 'pandoraclient.organize.jsonc', 'sync': 'pandoraclient.sync.jsonc' } DIRECTORIES = ['Extras', 'Segments', 'Versions'] def copy(): pass def execute_copy(): pass def organize(): def add_directory(path): d = directories[-1] for i, directory in enumerate(directories): if ox.sort_string(path).lower() < ox.sort_string(directory).lower(): d = directories[max(i - 1, 0)] break return '%s/%s' % (d, path) def remove_directory(path): return '/'.join(path.split('/')[1:]) def is_system_file(filename): return re.search('^\.(_|DS_Store$)', filename) != None config = get_config() volume = config['volumes'].keys()[0] volume_path = config['volumes'][volume] if isinstance(volume_path, str): volume_path = volume_path.decode('utf-8') if volume_path[-1] != '/': volume_path += '/' api = ox.API(config['url']) FILES['files'] = FILES['files'] % volume cache = {} if os.path.exists(FILES['files']): data = json.load(open(FILES['files'])) if data['path'] == volume_path: for file in data['files']: cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash'] directories_by_path = {} files = [] files_by_hash = {} files_by_item = {} files_by_path = {} issues = { 'duplicate files': [], 'empty directories': [], 'incorrect filenames': [], 'incorrect filenames (target exists)': [], 'missing files': [], 'missing subtitles': [], 'missing video': [], 'multiple versions': [], 'non-canonical filenames': [], 'non-canonical filenames (target exists)': [], 'system files': [], 'unexpected directories': [], 'unexpected files': [], 'unknown extensions': [] } previous_path = '' print 'Scanning %s' % volume_path directories = sorted(os.listdir(volume_path)) for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True): relative_path = unicodedata.normalize('NFD', absolute_path)[len(volume_path):] parts = relative_path.split('/') length = len(parts) for filename in ox.sorted_strings(filenames): full_path = os.path.join(absolute_path, filename) filename = unicodedata.normalize('NFD', filename) if relative_path != previous_path and length == 3: print relative_path previous_path = relative_path path = os.path.join(relative_path, filename) if is_system_file(filename): issues['system files'].append(path) else: # unexpected files, unexpected directories parts = path.split('/') if length < 3: issues['unexpected files'].append(path) if length <= 3: file = {} file['path'] = path file['size'] = os.path.getsize(full_path) file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat() key = '%s %s %s' % (file['path'], str(file['size']), file['time']) file['hash'] = cache[key] if key in cache else ox.oshash(full_path) files.append(file) if not file['hash'] in files_by_hash: files_by_hash[file['hash']] = [] files_by_hash[file['hash']].append(file) if not relative_path in files_by_path: files_by_path[relative_path] = [] files_by_path[relative_path].append(path) elif parts[3] in DIRECTORIES: path_key = '/'.join(parts[:3]) if not path_key in directories_by_path: directories_by_path[path_key] = [] directory = '/'.join(parts[:4]) if not directory in directories_by_path[path_key]: directories_by_path[path_key].append(directory) else: issues['unexpected directories'].append(path) # empty directories, missing files if not filenames or not filter(lambda x: not is_system_file(x), filenames): if not dirnames: issues['empty directories'].append(relative_path) elif length == 3: issues['missing files'].append(relative_path) print 'Scanning for duplicates' for hash in files_by_hash: if len(files_by_hash[hash]) > 1: issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash]))) print 'Scanning for inconsistencies' paths = api.getPath({'id': files_by_hash.keys()})['data'] for file in files: file = dict(file, **ox.movie.parse_path(remove_directory(file['path']))) file['normalizedPath'] = add_directory(file['normalizedPath']) # unknown extensions if not file['type']: issues['unknown extensions'].append(file['path']) path_key = 'normalizedPath' if file['hash'] in paths and paths[file['hash']] != remove_directory(file['path']): file['correctedPath'] = add_directory(paths[file['hash']]) path_key = 'correctedPath' if len(path.split('/')) == 4: file['item'] = '%s/%s' % ('/'.join(file[path_key].split('/')[:-1]), file['title']) if not file['item'] in files_by_item: files_by_item[file['item']] = [] files_by_item[file['item']].append(file) for item in sorted(files_by_item): item_files = files_by_item[item] # missing video, missing subtitles, multiple versions versions = ox.movie.parse_item_files(item_files) main_files = sum([version['files'] for version in versions if version['isMainVersion']], []) other_files = sum([version['files'] for version in versions if not version['isMainVersion']], []) if not main_files: issues['missing video'].append([file['path'] for file in item_files]) else: video_files = [file for file in main_files if file['type'] == 'video'] subtitle_files = [file for file in main_files if file['type'] == 'subtitle'] if subtitle_files and len(subtitle_files) < len(video_files): issues['missing subtitles'].append([file['path'] for file in main_files]) if other_files: issues['multiple versions'].append([file['path'] for file in main_files + other_files]) # incorrect filenames, non-canonical filenames for version in versions: path_key = {} for file in version['files']: path_key[file['path']] = 'correctedPath' if 'correctedPath' in file else 'normalizedPath' rename = [[ file['path'], file[path_key[file['path']]] ] for file in version['files'] if file['path'] != file[path_key[file['path']]]] languages = {'idx': [], 'srt': [], 'sub': []} for extension in languages: languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension]) if len(languages[extension]) == 1 and 'en' in languages[extension]: # only english subtitles regexp = '\.en(?=\.%s$)' % extension # don't add '.en' rename = [paths for paths in rename if not re.search(regexp, paths[1])] # remove '.en' rename += [[ file['path'], re.sub(regexp, '', file[path_key[file['path']]]) ] for file in version['files'] if re.search(regexp, file[path_key[file['path']]]) and file['path'] != re.sub(regexp, '', file[path_key[file['path']]])] for paths in rename: issues['%s filenames' % ( 'incorrect' if path_key[paths[0]] == 'correctedPath' else 'non-canonical' )].append(paths) print 'Scanning for conflicts' existing_paths = [file['path'] for file in files] for key in ['incorrect filenames', 'non-canonical filenames']: exists_key = '%s (target exists)' % key path_count = {} for path in [paths[1] for paths in issues[key]]: path_count[path] = 1 if not path in path_count else path_count[path] + 1 for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]: if path_count[paths[1]] > 1: # multiple files with the same target path issues[key].remove(paths) if not paths in issues[exists_key]: issues[exists_key].append(paths) elif not paths[1] in existing_paths: # target path does not exist, remove original path from existing paths existing_paths.remove(paths[0]) for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]: if paths[1] in existing_paths: # target path exists issues[key].remove(paths) if not paths in issues[exists_key]: issues[exists_key].append(paths) # orphaned directories new_path = {} rename_key = {} for key in ['incorrect filenames', 'non-canonical filenames']: for i, paths in enumerate(issues[key]): new_path[paths[0]] = '/'.join(paths[1].split('/')[:-1]) if not new_path[paths[0]] in rename_key: rename_key[new_path[paths[0]]] = key for path in [path for path in files_by_path if path in directories_by_path]: new_paths = [] for path_file in files_by_path[path]: if path_file in new_path: new_paths.append(new_path[path_file]) if len(new_paths) == len(files_by_path[path]) and len(set(new_paths)) == 1 and new_paths[0] != path: # per path, if all files get moved to the same path, move directories too for directory in directories_by_path[path]: new_directory = os.path.join(new_paths[0], directory.split('/')[-1]) key = rename_key[new_paths[0]] exists_key = '%s (target exists)' % key issues[exists_key if os.path.exists(new_directory) else key].append([directory, new_directory]) for key in ['incorrect filenames', 'non-canonical filenames']: for key in [key, '%s (target exists)' % key]: issues[key] = sorted(issues[key], key=lambda x: x[0].lower()) for issue in issues: if issues[issue]: if isinstance(issues[issue][0], str): issues[issue] = sorted(issues[issue]) else: issues[issue] = sorted(issues[issue], key=lambda x: x[0]) keys = { 'automatic': { 'remove': [ 'empty directories', 'system files', 'unexpected files', 'unknown extensions' ], 'rename': [ 'incorrect filenames', 'non-canonical filenames' ] }, 'manual': { 'rename': [ 'incorrect filenames (target exists)', 'non-canonical filenames (target exists)' ], 'resolve': [ 'duplicate files', 'missing files', 'missing subtitles', 'missing video', 'multiple versions', 'unexpected directories' ] } } print 'Writing %s' % FILES['files'] data = ['{'] data.append(4 * ' ' + '"directories": ' + get_json(directories) + ',') data.append(4 * ' ' + '"files": [') for f, file in enumerate(files): data.append(8 * ' ' + get_json({ 'hash': file['hash'], 'path': file['path'], 'size': file['size'], 'time': file['time'] }, sort_keys=True) + (',' if f < len(files) - 1 else '')) data.append(4 * ' ' + '],') data.append(4 * ' ' + '"path": ' + get_json(volume_path) + ',') data.append(4 * ' ' + '"totals": {"files": %d, "size": %d' % ( len(files), sum([file['size'] for file in files]) ) + '}') data.append('}') write_file(FILES['files'], u'\n'.join(data)) print 'Writing %s' % FILES['organize'] data = ['{'] sections = sorted(keys) for s, section in enumerate(sections): data.append('') data.append(4 * ' ' + '"%s": {' % section) actions = sorted(keys[section]) for a, action in enumerate(actions): data.append('') data.append(8 * ' ' + '"%s": [' % action) issue_keys = keys[section][action] for i, issue in enumerate(issue_keys): data.append('') data.append(12 * ' ' + '/* %s */' % issue) if issues[issue]: for line in get_json(issues[issue], indent=4).split('\n')[1:-1]: data.append(8 * ' ' + line) if i < len(issue_keys) - 1: data[-1] += ',' data.append('') data.append(8 * ' ' + ']' + (',' if a < len(actions) - 1 else '')) data.append('') data.append(4 * ' ' + '}' + (',' if s < len(sections) - 1 else '')) data.append('}') write_file(FILES['organize'], u'\n'.join(data)) print 'Next, edit %s and run pandoraclient organize -x' % FILES['organize'] def execute_organize(): def get_empty_directories(): empty_directories = [] for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True): if not dirnames and not filenames: empty_directories.append(absolute_path) return empty_directories def remove_file(path): print 'Removing "%s"' % path try: if os.path.isdir(path): os.rmdir(path) else: os.remove(path) except: raise IOError('Could not remove file') def rename_file(source, target): print 'Renaming "%s" to "%s"' % (source, target) if not os.path.exists(source): raise IOError('Source does not exist') elif os.path.exists(target): raise IOError('Target exists') else: ox.makedirs(os.path.dirname(target)) shutil.move(source, target) config = get_config() volume = config['volumes'].keys()[0] volume_path = config['volumes'][volume] if not os.path.exists(FILES['organize']): sys.exit('%s not found' % FILES['organize']) data = ox.jsonc.load(open(FILES['organize'])) old_empty_directories = get_empty_directories() remove = map(lambda x: os.path.join(volume_path, x), data['automatic']['remove']) rename = map(lambda x: map(lambda y: os.path.join(volume_path, y), x), data['automatic']['rename']) errors = [] for path in remove: try: remove_file(path) except: errors.append('Could not remove "%s"' % path) for paths in rename: source = paths[0] target = paths[1] + '.pandora' try: rename_file(source, target) except IOError as error: errors.append('Could not rename "%s" to "%s" (%s)' % (source, target, error)) for paths in rename: source = paths[1] + '.pandora' target = paths[1] try: rename_file(source, target) except IOError as error: errors.append('Could not rename "%s" to "%s" (%s)' % (source, target, error)) while True: new_empty_directories = [path for path in get_empty_directories() if not path in old_empty_directories] if new_empty_directories: for path in new_empty_directories: try: remove_file(path) except: errors.append('Could not remove "%s"' % path) else: break for error in errors: print error def sync(): def get_directories(files): directories = {} for file in files: # path = os.path.dirname(file['path']) path = '/'.join(file['path'].split('/')[:3]) if not path in directories: directories[path] = {'files': []} directories[path]['files'].append(file) for path in directories: directories[path]['hashes'] = '\n'.join(sorted( [file['hash'] for file in directories[path]['files']] )) directories[path]['paths'] = '\n'.join(sorted( [file['path'] for file in directories[path]['files']] )) return directories if len(sys.argv) != 3: print 'Usage: %s sync targetfiles.json' % sys.argv[0] sys.exit(1) print 'Reading files' source_file = FILES['files'] target_file = sys.argv[2] files = [ox.json.load(open(file))['files'] for file in [source_file, target_file]] directories = [get_directories(file) for file in files] file_path_by_hash = [{}, {}] hashes = [{}, {}] paths = [{}, {}] for i in [0, 1]: for file in files[i]: if file['hash'] in file_path_by_hash[i]: print 'Duplicate file:' print file_path_by_hash[i][file['hash']] print file['path'] sys.exit() file_path_by_hash[i][file['hash']] = file['path'] for path in sorted(directories[i]): directory = directories[i][path] hashes[i][directory['hashes']] = path paths[i][directory['paths']] = path print 'Determining files to sync' sync = { 'same files, different filenames': [], 'same file, different filename': [], 'unique directory': [], 'unique file': [] } for path in sorted(directories[0]): directory = directories[0][path] file_hashes = [ directory['hashes'].split('\n'), directories[1][path]['hashes'].split('\n') ] if directory['hashes'] in hashes[1]: # same files if not directory['paths'] in paths[1]: # different filenames rename = [] for file in directory['files']: if file['path'] != file_path_by_hash[1][file['hash']]: rename.append([file['path'], file_path_by_hash[1][file['hash']]]) sync['same files, different filenames'].append(rename) for hash in file_hashes[0]: for i in [0, 1]: del file_path_by_hash[i][hash] else: for hash in file_hashes[0]: if not hash in file_hashes[1]: if hash in file_path_by_hash[1]: sync['same file, different filename'].append([ file_path_by_hash[0][hash], file_path_by_hash[1][hash] ]) del file_path_by_hash[0][hash] del file_path_by_hash[1][hash] else: sync['unique file'].append([ file_path_by_hash[0][hash], None ]) del file_path_by_hash[0][hash] for hash in [hash for hash in file_hashes[1] if hash in file_path_by_hash[1]]: sync['unique file'].append([ None, file_path_by_hash[0][hash] ]) ''' elif path in directories[1] and directory['hashes'] != directories[1][path]['hashes']: # same directory, different files file_hashes_1 = directories[1][path]['hashes'].split('\n') if file_hashes_1[0] in file_path_by_hash[1]: # directory in b not handled in previous step sync['same directory, different files'].append([ [file['path'] for file in directory['files']], [file['path'] for file in directories[1][path]['files']] ]) for hash in file_hashes: del file_path_by_hash[0][hash] for hash in file_hashes_1: del file_path_by_hash[1][hash] elif len( [hash for hash in file_hashes if hash in file_path_by_hash[0] and not hash in file_path_by_hash[1]] ) == len(file_hashes): sync['unique directory'].append([directory['files'], None]) for hash in file_hashes: del file_path_by_hash[0][hash] for path in sorted(directories[1]): directory = directories[1][path] file_hashes = directory['hashes'].split('\n') if len( [hash for hash in file_hashes if hash in file_path_by_hash[1] and not hash in file_path_by_hash[0]] ) == len(file_hashes): sync['unique directory'].append([None, directory['paths']]) for hash in file_hashes: del file_path_by_hash[1][hash] for hash in file_path_by_hash[0]: if hash in file_path_by_hash[1]: sync['same file, different filenames'].append( [file_path_by_hash[0][hash], file_path_by_hash[1][hash]] ) ''' print 'Writing %s' % FILES['sync'] data = ['['] data.append('') for key in [ 'same files, different filenames', 'same directory, different files', 'same file, different directory', 'unique directory' ]: data.append('') data.append(4 * ' ' + '/* %s */' % key) for paths in sync[key]: data.append('') if key == 'same files, different filenames': data.append(4 * ' ' + '/* rename in b */') data.append(',\n'.join([4 * ' ' + get_json(['b', 'rename'] + x) for x in paths]) + ',') data.append(4 * ' ' + '/* rename in a */') data.append(',\n'.join([4 * ' ' + '// ' + get_json(['a', 'rename'] + list(reversed(x))) for x in paths]) + ',') elif key == 'same directory, different files': data.append(4 * ' ' + '/* remove in b, copy from a to b */') data.append(',\n'.join([4 * ' ' + get_json(['b', 'remove', x]) for x in paths[1]]) + ',') data.append(',\n'.join([4 * ' ' + get_json(['a', 'copy', x]) for x in paths[0]]) + ',') data.append(4 * ' ' + '/* remove in a, copy from b to a */') data.append(',\n'.join([4 * ' ' + '// ' + get_json(['a', 'remove', x]) for x in paths[0]]) + ',') data.append(',\n'.join([4 * ' ' + '// ' + get_json(['b', 'copy', x]) for x in paths[1]]) + ',') elif key == 'same file, different directory': data.append(4 * ' ' + '/* rename in b */') #print get_json(paths, indent=4).encode('utf-8') data.append(4 * ' ' + get_json(['b', 'rename'] + paths) + ',') data.append(4 * ' ' + '/* rename in a */') data.append(4 * ' ' + '// ' + get_json(['a', 'rename'] + list(reversed(paths))) + ',') else: copy = ['a', 'b'] if paths[0] else ['b', 'a'] i = 0 if paths[0] else 1 data.append(4 * ' ' + '/* copy from %s to %s */' % (copy[0], copy[1])) data.append('\n'.join([4 * ' ' + get_json([copy[0], 'copy', x]) for x in paths[i]]) + ',') data.append(4 * ' ' + '/* remove in %s */' % copy[0]) data.append('\n'.join([4 * ' ' + '// ' + get_json([copy[0], 'remove', x]) for x in paths[i]]) + ',') data[-1] = data[-1][:-1] data.append('') data.append(']') write_file(FILES['sync'], u'\n'.join(data)) def execute_sync(): pass def update(): pass def execute_update(): pass def get_config(): if not os.path.exists(FILES['config']): sys.exit('%s not found' % FILES['config']) with open(FILES['config']) as f: config = ox.jsonc.load(f) return config def get_json(data, indent=None, sort_keys=False): # return json.dumps(data, ensure_ascii=False, indent=indent, sort_keys=sort_keys) return json.dumps(data, indent=indent, sort_keys=sort_keys) def write_file(filename, data): with open(filename, 'w') as f: # f.write(data.encode('utf-8')) f.write(data) if __name__ == '__main__': actions = ['copy', 'organize', 'sync', 'upload'] action_string = '%s or %s' % (', '.join(actions[:-1]), actions[-1]) usage = 'usage: %prog action [volume] [options]' description = 'Action: %s' % action_string parser = OptionParser(usage=usage, description=description) parser.add_option( '-e', '--encode', action='store_true', dest='encode', help='encode only, do not upload' ) parser.add_option( '-v', '--verbose', action='store_true', dest='verbose', help='verbose output' ) parser.add_option( '-x', '--execute', action='store_true', dest='execute', help='execute %s' % action_string ) (opts, args) = parser.parse_args() if len(args) == 0: sys.exit(parser.get_usage()) action = args[0] args = args[1:] if action == 'copy': if not opts.execute: copy() else: copy_x() elif action == 'organize': if not opts.execute: organize() else: execute_organize() elif action == 'sync': if not opts.execute: sync() else: execute_sync() elif action == 'upload': if not opts.execute: upload() else: execute_upload()