commit b923b176fdb2a38fa77f89ea6490dc207bdcf647 Author: rolux Date: Tue Sep 11 18:00:56 2012 +0200 initial commit diff --git a/pandoraclient b/pandoraclient new file mode 100644 index 0000000..985d497 --- /dev/null +++ b/pandoraclient @@ -0,0 +1,550 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +# GPL 2012 +from __future__ import division, with_statement +import datetime +import json +from optparse import OptionParser +import os +import ox +import re +import shutil +import sys +import unicodedata + + +FILES = { + 'config': 'pandoraclient.config.jsonc', + 'errors': 'pandoraclient.errors.json', + 'files': 'pandoraclient.files.%s.json', + 'organize': 'pandoraclient.organize.jsonc', + 'sync': 'pandoraclient.sync.jsonc' +} + + +def copy(): + + pass + + +def execute_copy(): + + pass + + +def organize(): + + def add_directory(path): + d = directories[-1] + for i, directory in enumerate(directories): + if ox.sort_string(path).lower() < ox.sort_string(directory).lower(): + d = directories[max(i - 1, 0)] + break + return '%s/%s' % (d, path) + + def remove_directory(path): + return '/'.join(path.split('/')[1:]) + + def is_system_file(filename): + return re.search('^\.(_|DS_Store$)', filename) != None + + config = get_config() + volume = config['volumes'].keys()[0] + volume_path = config['volumes'][volume] + FILES['files'] = FILES['files'] % volume + if isinstance(volume_path, str): + volume_path = volume_path.decode('utf-8') + if volume_path[-1] != '/': + volume_path += '/' + cache = {} + if os.path.exists(FILES['files']): + data = json.load(open(FILES['files'])) + if data['path'] == volume_path: + for file in data['files']: + cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash'] + directories = [] + files = [] + files_by_hash = {} + files_by_item = {} + issues = { + 'duplicate files': [], + 'empty directories': [], + 'incorrect filenames': [], + 'incorrect filenames (target exists)': [], + 'missing subtitles': [], + 'missing video': [], + 'multiple versions': [], + 'non-canonical filenames': [], + 'non-canonical filenames (target exists)': [], + 'system files': [], + 'unexpected files': [], + 'unknown extensions': [] + } + previous_path = '' + + print 'Scanning %s' % volume_path + directories = sorted(os.listdir(volume_path)) + for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True): + absolute_path = unicodedata.normalize('NFD', absolute_path) + relative_path = absolute_path[len(volume_path):] + for filename in ox.sorted_strings(filenames): + filename = unicodedata.normalize('NFD', filename) + if relative_path != previous_path and len(relative_path.split('/')) == 3: + print relative_path + previous_path = relative_path + full_path = os.path.join(absolute_path, filename) + path = os.path.join(relative_path, filename) + if is_system_file(filename): + issues['system files'].append(path) + else: + file = ox.movie.parse_path(remove_directory(path)) + file['normalizedPath'] = add_directory(file['normalizedPath']) + file['path'] = path + file['size'] = os.path.getsize(full_path) + file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat() + key = '%s %s %s' % (file['path'], str(file['size']), file['time']) + file['hash'] = cache[key] if key in cache else ox.oshash(full_path) + files.append(file) + if not file['hash'] in files_by_hash: + files_by_hash[file['hash']] = [] + files_by_hash[file['hash']].append(file) + # unexpected files + length = len(path.split('/')) + if length < 4: + issues['unexpected files'].append(path) + elif length == 4: + # add parsed title to item key so that episodes are their own item + key = '%s/%s' % ('/'.join(file['normalizedPath'].split('/')[:-1]), file['title']) + if not key in files_by_item: + files_by_item[key] = [] + files_by_item[key].append(file) + if not dirnames and (not filenames or not filter(lambda x: not is_system_file(x), filenames)): + issues['empty directories'].append(relative_path) + + print 'Scanning for duplicates' + for hash in files_by_hash: + if len(files_by_hash[hash]) > 1: + issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash]))) + + print 'Scanning for inconsistencies' + for key in sorted(files_by_item.keys()): + item_files = files_by_item[key] + # unknown extensions + issues['unknown extensions'] += [file['path'] for file in item_files if not file['type']] + # missing video, missing subtitles, multiple versions + versions = ox.movie.parse_item_files(item_files) + main_files = sum([version['files'] for version in versions if version['isMainVersion']], []) + other_files = sum([version['files'] for version in versions if not version['isMainVersion']], []) + if not main_files: + issues['missing video'].append([file['path'] for file in item_files]) + else: + video_files = [file for file in main_files if file['type'] == 'video'] + subtitle_files = [file for file in main_files if file['type'] == 'subtitle'] + if subtitle_files and len(subtitle_files) < len(video_files): + issues['missing subtitles'].append([file['path'] for file in main_files]) + if other_files: + issues['multiple versions'].append([file['path'] for file in main_files + other_files]) + # non-canonical filenames + for version in versions: + rename = [[ + file['path'], file['normalizedPath'] + ] for file in version['files'] if file['path'] != file['normalizedPath']] + languages = {'idx': [], 'srt': [], 'sub': []} + for extension in languages: + languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension]) + if len(languages[extension]) == 1 and 'en' in languages[extension]: + # only english subtitles + regexp = '\.en(?=\.%s$)' % extension + # don't add '.en' + rename = [paths for paths in rename if not re.search(regexp, paths[1])] + # remove '.en' + rename += [[ + file['path'], re.sub(regexp, '', file['path']) + ] for file in version['files'] if re.search(regexp, file['path'])] + if rename: + issues['non-canonical filenames'] += rename + + print 'Scanning for conflicts' + existing_paths = [file['path'] for file in files] + for key in ['incorrect filenames', 'non-canonical filenames']: + key_exists = '%s (target exists)' % key + new_paths = [paths[1] for paths in issues[key]] + for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]: + if new_paths.count(paths[1]) > 1: + # multiple files with the same normalized path + issues[key].remove(paths) + if not paths in issues[key_exists]: + issues[key_exists].append(paths) + elif not filenames[1] in existing_paths: + # normalized path does not exist, remove original path from existing paths + existing_paths.remove(paths[0]) + for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]: + if paths[1] in existing_paths: + # normalized path exists + issues[key].remove(paths) + if not paths in issues[key_exists]: + issues[key_exists].append(paths) + + for issue in issues: + if issues[issue]: + if isinstance(issues[issue][0], str): + issues[issue] = sorted(issues[issue]) + else: + issues[issue] = sorted(issues[issue], key=lambda x: x[0]) + keys = { + 'automatic': { + 'remove': [ + 'empty directories', 'system files', + 'unexpected files', 'unknown extensions' + ], + 'rename': [ + 'incorrect filenames', + 'non-canonical filenames' + ] + }, + 'manual': { + 'rename': [ + 'incorrect filenames (target exists)', + 'non-canonical filenames (target exists)' + ], + 'resolve': [ + 'duplicate files', 'missing subtitles', + 'missing video', 'multiple versions' + ] + } + } + + print 'Writing %s' % FILES['files'] + data = ['{'] + data.append(4 * ' ' + '"directories": ' + json.dumps(directories) + ',') + data.append(4 * ' ' + '"files": [') + for f, file in enumerate(files): + data.append(8 * ' ' + json.dumps({ + 'hash': file['hash'], + 'path': file['path'], + 'size': file['size'], + 'time': file['time'] + }, sort_keys=True) + (',' if f < len(files) - 1 else '')) + data.append(4 * ' ' + '],') + data.append(4 * ' ' + '"path": ' + json.dumps(volume_path) + ',') + data.append(4 * ' ' + '"totals": {"files": %d, "size": %d' % ( + len(files), sum([file['size'] for file in files]) + ) + '}') + data.append('}') + with open(FILES['files'], 'w') as f: + f.write('\n'.join(data)) + + print 'Writing %s' % FILES['organize'] + data = ['{'] + sections = sorted(keys.keys()) + for s, section in enumerate(sections): + data.append('') + data.append(4 * ' ' + '"%s": {' % section) + actions = sorted(keys[section].keys()) + for a, action in enumerate(actions): + data.append('') + data.append(8 * ' ' + '"%s": [' % action) + issue_keys = keys[section][action] + for i, issue in enumerate(issue_keys): + data.append('') + data.append(12 * ' ' + '/* %s */' % issue) + if issues[issue]: + for line in json.dumps(issues[issue], indent=4).split('\n')[1:-1]: + data.append(8 * ' ' + line) + if i < len(issue_keys) - 1: + data[-1] += ',' + data.append('') + data.append(8 * ' ' + ']' + (',' if a < len(actions) - 1 else '')) + data.append('') + data.append(4 * ' ' + '}' + (',' if s < len(sections) - 1 else '')) + data.append('}') + with open(FILES['organize'], 'w') as f: + f.write('\n'.join(data)) + print 'Next, edit %s and run pandoraclient organize -x' % FILES['organize'] + + +def execute_organize(): + + def rename_file(source, target): + if not os.path.exists(source): + raise IOError('Source does not exist') + elif os.path.exists(target): + raise IOError('Target exists') + else: + ox.makedirs(os.path.dirname(target)) + shutil.move(source, target) + + config = get_config() + volume = config['volumes'].keys()[0] + volume_path = config['volumes'][volume] + + if not os.path.exists(FILES['organize']): + sys.exit('%s not found' % FILES['organize']) + + data = ox.jsonc.load(open(FILES['organize'])) + remove = map(lambda x: os.path.join(volume_path, x), data['automatic']['remove']) + rename = map(lambda x: map(lambda y: os.path.join(volume_path, y), x), data['automatic']['rename']) + for path in remove: + try: + if os.path.isdir(path): + os.rmdir(path) + else: + os.remove(path) + except: + print "Could not remove '%s'" % (path) + for paths in rename: + source = paths[0] + target = paths[1] + '.pandora' + try: + rename_file(source, target) + except IOError as error: + print "Could not rename '%s' to '%s' (%s)" % (source, target, error) + for paths in rename: + source = paths[1] + '.pandora' + target = paths[1] + try: + print + rename_file(source, target) + except IOError as error: + print "Could not rename '%s' to '%s' (%s)" % (source, target, error) + + +def sync(): + + def get_directories(files): + directories = {} + for file in files: + # path = os.path.dirname(file['path']) + path = '/'.join(file['path'].split('/')[:3]) + if not path in directories: + directories[path] = {'files': []} + directories[path]['files'].append(file) + for path in directories: + directories[path]['hashes'] = '\n'.join(sorted( + [file['hash'] for file in directories[path]['files']] + )) + directories[path]['paths'] = '\n'.join(sorted( + [file['path'] for file in directories[path]['files']] + )) + return directories + + if len(sys.argv) != 3: + print 'Usage: %s sync targetfiles.json' % sys.argv[0] + sys.exit(1) + + print 'Reading files' + source_file = FILES['files'] + target_file = sys.argv[2] + files = [ox.json.load(open(file))['files'] for file in [source_file, target_file]] + directories = [get_directories(file) for file in files] + file_path_by_hash = [{}, {}] + hashes = [{}, {}] + paths = [{}, {}] + for i in [0, 1]: + for file in files[i]: + if file['hash'] in file_path_by_hash[i]: + print 'Duplicate file:' + print file_path_by_hash[i][file['hash']] + print file['path'] + sys.exit() + file_path_by_hash[i][file['hash']] = file['path'] + for path in sorted(directories[i].keys()): + directory = directories[i][path] + hashes[i][directory['hashes']] = path + paths[i][directory['paths']] = path + + print 'Determining files to sync' + sync = { + 'same files, different filenames': [], + 'same file, different filename': [], + 'unique directory': [], + 'unique file': [] + } + for path in sorted(directories[0].keys()): + directory = directories[0][path] + file_hashes = [ + directory['hashes'].split('\n'), + directories[1][path]['hashes'].split('\n') + ] + if directory['hashes'] in hashes[1]: + # same files + if not directory['paths'] in paths[1]: + # different filenames + rename = [] + for file in directory['files']: + if file['path'] != file_path_by_hash[1][file['hash']]: + rename.append([file['path'], file_path_by_hash[1][file['hash']]]) + sync['same files, different filenames'].append(rename) + for hash in file_hashes[0]: + for i in [0, 1]: + del file_path_by_hash[i][hash] + else: + for hash in file_hashes[0]: + if not hash in file_hashes[1]: + if hash in file_path_by_hash[1]: + sync['same file, different filename'].append([ + file_path_by_hash[0][hash], + file_path_by_hash[1][hash] + ]) + del file_path_by_hash[0][hash] + del file_path_by_hash[1][hash] + else: + sync['unique file'].append([ + file_path_by_hash[0][hash], + None + ]) + del file_path_by_hash[0][hash] + for hash in [hash for hash in file_hashes[1] if hash in file_path_by_hash[1]]: + sync['unique file'].append([ + None, + file_path_by_hash[0][hash] + ]) + ''' + elif path in directories[1] and directory['hashes'] != directories[1][path]['hashes']: + # same directory, different files + file_hashes_1 = directories[1][path]['hashes'].split('\n') + if file_hashes_1[0] in file_path_by_hash[1]: + # directory in b not handled in previous step + sync['same directory, different files'].append([ + [file['path'] for file in directory['files']], + [file['path'] for file in directories[1][path]['files']] + ]) + for hash in file_hashes: + del file_path_by_hash[0][hash] + for hash in file_hashes_1: + del file_path_by_hash[1][hash] + elif len( + [hash for hash in file_hashes if hash in file_path_by_hash[0] and not hash in file_path_by_hash[1]] + ) == len(file_hashes): + sync['unique directory'].append([directory['files'], None]) + for hash in file_hashes: + del file_path_by_hash[0][hash] + for path in sorted(directories[1].keys()): + directory = directories[1][path] + file_hashes = directory['hashes'].split('\n') + if len( + [hash for hash in file_hashes if hash in file_path_by_hash[1] and not hash in file_path_by_hash[0]] + ) == len(file_hashes): + sync['unique directory'].append([None, directory['paths']]) + for hash in file_hashes: + del file_path_by_hash[1][hash] + for hash in file_path_by_hash[0]: + if hash in file_path_by_hash[1]: + sync['same file, different filenames'].append( + [file_path_by_hash[0][hash], file_path_by_hash[1][hash]] + ) + ''' + + print 'Writing %s' % FILES['sync'] + data = ['['] + data.append('') + for key in [ + 'same files, different filenames', + 'same directory, different files', + 'same file, different directory', + 'unique directory' + ]: + data.append('') + data.append(4 * ' ' + '/* %s */' % key) + for paths in sync[key]: + data.append('') + if key == 'same files, different filenames': + data.append(4 * ' ' + '/* rename in b */') + data.append(',\n'.join([4 * ' ' + json.dumps(['b', 'rename'] + x) for x in paths]) + ',') + data.append(4 * ' ' + '/* rename in a */') + data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['a', 'rename'] + list(reversed(x))) for x in paths]) + ',') + elif key == 'same directory, different files': + data.append(4 * ' ' + '/* remove in b, copy from a to b */') + data.append(',\n'.join([4 * ' ' + json.dumps(['b', 'remove', x]) for x in paths[1]]) + ',') + data.append(',\n'.join([4 * ' ' + json.dumps(['a', 'copy', x]) for x in paths[0]]) + ',') + data.append(4 * ' ' + '/* remove in a, copy from b to a */') + data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['a', 'remove', x]) for x in paths[0]]) + ',') + data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['b', 'copy', x]) for x in paths[1]]) + ',') + elif key == 'same file, different directory': + data.append(4 * ' ' + '/* rename in b */') + print json.dumps(paths, indent=4) + data.append(4 * ' ' + json.dumps(['b', 'rename'] + paths) + ',') + data.append(4 * ' ' + '/* rename in a */') + data.append(4 * ' ' + '// ' + json.dumps(['a', 'rename'] + list(reversed(paths))) + ',') + else: + copy = ['a', 'b'] if paths[0] else ['b', 'a'] + i = 0 if paths[0] else 1 + data.append(4 * ' ' + '/* copy from %s to %s */' % (copy[0], copy[1])) + data.append('\n'.join([4 * ' ' + json.dumps([copy[0], 'copy', x]) for x in paths[i]]) + ',') + data.append(4 * ' ' + '/* remove in %s */' % copy[0]) + data.append('\n'.join([4 * ' ' + '// ' + json.dumps([copy[0], 'remove', x]) for x in paths[i]]) + ',') + data[-1] = data[-1][:-1] + data.append('') + data.append(']') + with open(FILES['sync'], 'w') as f: + f.write('\n'.join(data)) + + +def execute_sync(): + + pass + + +def update(): + + pass + + +def execute_update(): + + pass + + +def get_config(): + if not os.path.exists(FILES['config']): + sys.exit('%s not found' % FILES['config']) + with open(FILES['config']) as f: + config = ox.jsonc.load(f) + return config + + +if __name__ == '__main__': + + actions = ['copy', 'organize', 'sync', 'upload'] + action_string = '%s or %s' % (', '.join(actions[:-1]), actions[-1]) + usage = 'usage: %prog action [volume] [options]' + description = 'Action: %s' % action_string + parser = OptionParser(usage=usage, description=description) + parser.add_option( + '-e', '--encode', action='store_true', dest='encode', + help='encode only, do not upload' + ) + parser.add_option( + '-v', '--verbose', action='store_true', dest='verbose', + help='verbose output' + ) + parser.add_option( + '-x', '--execute', action='store_true', dest='execute', + help='execute %s' % action_string + ) + (opts, args) = parser.parse_args() + if len(args) == 0: + sys.exit(parser.get_usage()) + action = args[0] + args = args[1:] + if action == 'copy': + if not opts.execute: + copy() + else: + copy_x() + elif action == 'organize': + if not opts.execute: + organize() + else: + execute_organize() + elif action == 'sync': + if not opts.execute: + sync() + else: + execute_sync() + elif action == 'upload': + if not opts.execute: + upload() + else: + execute_upload() diff --git a/pandoraclient.config.jsonc b/pandoraclient.config.jsonc new file mode 100644 index 0000000..bcb048e --- /dev/null +++ b/pandoraclient.config.jsonc @@ -0,0 +1,11 @@ +{ + "cache": "~/.ox/pandoraclient/cache.sqlite", + "media": "~/.ox/pandoraclient/media/", + "password": "", + "subdirectories": true, + "url": "https://0xdb.org/api/", + "username": "", + "volumes": { + "cinema": "/Users/rolux/Movies/Cinema/" + } +} \ No newline at end of file