initial commit

2012-09-11 18:00:56 +02:00 · 2012-09-11 18:00:56 +02:00 · b923b176fd
commit b923b176fd
2 changed files with 561 additions and 0 deletions
--- a/550
+++ b/550
@ -0,0 +1,550 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL 2012
+from __future__ import division, with_statement
+import datetime
+import json
+from optparse import OptionParser
+import os
+import ox
+import re
+import shutil
+import sys
+import unicodedata
+
+
+FILES = {
+    'config': 'pandoraclient.config.jsonc',
+    'errors': 'pandoraclient.errors.json',
+    'files': 'pandoraclient.files.%s.json',
+    'organize': 'pandoraclient.organize.jsonc',
+    'sync': 'pandoraclient.sync.jsonc'
+}
+
+
+def copy():
+
+    pass
+
+
+def execute_copy():
+
+    pass
+
+
+def organize():
+
+    def add_directory(path):
+        d = directories[-1]
+        for i, directory in enumerate(directories):
+            if ox.sort_string(path).lower() < ox.sort_string(directory).lower():
+                d = directories[max(i - 1, 0)]
+                break
+        return '%s/%s' % (d, path)
+
+    def remove_directory(path):
+        return '/'.join(path.split('/')[1:])
+
+    def is_system_file(filename):
+        return re.search('^\.(_|DS_Store$)', filename) != None
+
+    config = get_config()
+    volume = config['volumes'].keys()[0]
+    volume_path = config['volumes'][volume]
+    FILES['files'] = FILES['files'] % volume
+    if isinstance(volume_path, str):
+        volume_path = volume_path.decode('utf-8')
+    if volume_path[-1] != '/':
+        volume_path += '/'
+    cache = {}
+    if os.path.exists(FILES['files']):
+        data = json.load(open(FILES['files']))
+        if data['path'] == volume_path:
+            for file in data['files']:
+                cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash']
+    directories = []
+    files = []
+    files_by_hash = {}
+    files_by_item = {}
+    issues = {
+        'duplicate files': [],
+        'empty directories': [],
+        'incorrect filenames': [],
+        'incorrect filenames (target exists)': [],
+        'missing subtitles': [],
+        'missing video': [],
+        'multiple versions': [],
+        'non-canonical filenames': [],
+        'non-canonical filenames (target exists)': [],
+        'system files': [],
+        'unexpected files': [],
+        'unknown extensions': []
+    }
+    previous_path = ''
+
+    print 'Scanning %s' % volume_path
+    directories = sorted(os.listdir(volume_path))
+    for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True):
+        absolute_path = unicodedata.normalize('NFD', absolute_path)
+        relative_path = absolute_path[len(volume_path):]
+        for filename in ox.sorted_strings(filenames):
+            filename = unicodedata.normalize('NFD', filename)
+            if relative_path != previous_path and len(relative_path.split('/')) == 3:
+                print relative_path
+                previous_path = relative_path
+            full_path = os.path.join(absolute_path, filename)
+            path = os.path.join(relative_path, filename)
+            if is_system_file(filename):
+                issues['system files'].append(path)
+            else:
+                file = ox.movie.parse_path(remove_directory(path))
+                file['normalizedPath'] = add_directory(file['normalizedPath'])
+                file['path'] = path
+                file['size'] = os.path.getsize(full_path)
+                file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
+                key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
+                file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
+                files.append(file)
+                if not file['hash'] in files_by_hash:
+                    files_by_hash[file['hash']] = []
+                files_by_hash[file['hash']].append(file)
+                # unexpected files
+                length = len(path.split('/'))
+                if length < 4:
+                    issues['unexpected files'].append(path)
+                elif length == 4:
+                    # add parsed title to item key so that episodes are their own item
+                    key = '%s/%s' % ('/'.join(file['normalizedPath'].split('/')[:-1]), file['title'])
+                    if not key in files_by_item:
+                        files_by_item[key] = []
+                    files_by_item[key].append(file)
+        if not dirnames and (not filenames or not filter(lambda x: not is_system_file(x), filenames)):
+            issues['empty directories'].append(relative_path)
+
+    print 'Scanning for duplicates'
+    for hash in files_by_hash:
+        if len(files_by_hash[hash]) > 1:
+            issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash])))
+
+    print 'Scanning for inconsistencies'
+    for key in sorted(files_by_item.keys()):
+        item_files = files_by_item[key]
+        # unknown extensions
+        issues['unknown extensions'] += [file['path'] for file in item_files if not file['type']]
+        # missing video, missing subtitles, multiple versions
+        versions = ox.movie.parse_item_files(item_files)
+        main_files = sum([version['files'] for version in versions if version['isMainVersion']], [])
+        other_files = sum([version['files'] for version in versions if not version['isMainVersion']], [])
+        if not main_files:
+            issues['missing video'].append([file['path'] for file in item_files])
+        else:
+            video_files = [file for file in main_files if file['type'] == 'video']
+            subtitle_files = [file for file in main_files if file['type'] == 'subtitle']
+            if subtitle_files and len(subtitle_files) < len(video_files):
+                issues['missing subtitles'].append([file['path'] for file in main_files])
+            if other_files:
+                issues['multiple versions'].append([file['path'] for file in main_files + other_files])
+        # non-canonical filenames
+        for version in versions:
+            rename = [[
+                file['path'], file['normalizedPath']
+            ] for file in version['files'] if file['path'] != file['normalizedPath']]
+            languages = {'idx': [], 'srt': [], 'sub': []}
+            for extension in languages:
+                languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension])
+                if len(languages[extension]) == 1 and 'en' in languages[extension]:
+                    # only english subtitles
+                    regexp = '\.en(?=\.%s$)' % extension
+                    # don't add '.en'
+                    rename = [paths for paths in rename if not re.search(regexp, paths[1])]
+                    # remove '.en'
+                    rename += [[
+                        file['path'], re.sub(regexp, '', file['path'])
+                    ] for file in version['files'] if re.search(regexp, file['path'])]
+            if rename:
+                issues['non-canonical filenames'] += rename
+
+    print 'Scanning for conflicts'
+    existing_paths = [file['path'] for file in files]
+    for key in ['incorrect filenames', 'non-canonical filenames']:
+        key_exists = '%s (target exists)' % key
+        new_paths = [paths[1] for paths in issues[key]]
+        for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
+            if new_paths.count(paths[1]) > 1:
+                # multiple files with the same normalized path
+                issues[key].remove(paths)
+                if not paths in issues[key_exists]:
+                    issues[key_exists].append(paths)
+            elif not filenames[1] in existing_paths:
+                # normalized path does not exist, remove original path from existing paths
+                existing_paths.remove(paths[0])
+        for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
+            if paths[1] in existing_paths:
+                # normalized path exists
+                issues[key].remove(paths)
+                if not paths in issues[key_exists]:
+                    issues[key_exists].append(paths)
+
+    for issue in issues:
+        if issues[issue]:
+            if isinstance(issues[issue][0], str):
+                issues[issue] = sorted(issues[issue])
+            else:
+                issues[issue] = sorted(issues[issue], key=lambda x: x[0])
+    keys = {
+        'automatic': {
+            'remove': [
+                'empty directories', 'system files',
+                'unexpected files', 'unknown extensions'
+            ],
+            'rename': [
+                'incorrect filenames',
+                'non-canonical filenames'
+            ]
+        },
+        'manual': {
+            'rename': [
+                'incorrect filenames (target exists)',
+                'non-canonical filenames (target exists)'
+            ],
+            'resolve': [
+                'duplicate files', 'missing subtitles',
+                'missing video', 'multiple versions'
+            ]
+        }
+    }
+
+    print 'Writing %s' % FILES['files']
+    data = ['{']
+    data.append(4 * ' ' + '"directories": ' + json.dumps(directories) + ',')
+    data.append(4 * ' ' + '"files": [')
+    for f, file in enumerate(files):
+        data.append(8 * ' ' + json.dumps({
+            'hash': file['hash'],
+            'path': file['path'],
+            'size': file['size'],
+            'time': file['time']
+        }, sort_keys=True) + (',' if f < len(files) - 1 else ''))
+    data.append(4 * ' ' + '],')
+    data.append(4 * ' ' + '"path": ' + json.dumps(volume_path) + ',')
+    data.append(4 * ' ' + '"totals": {"files": %d, "size": %d' % (
+        len(files), sum([file['size'] for file in files])
+    ) + '}')
+    data.append('}')
+    with open(FILES['files'], 'w') as f:
+        f.write('\n'.join(data))
+
+    print 'Writing %s' % FILES['organize']
+    data = ['{']
+    sections = sorted(keys.keys())
+    for s, section in enumerate(sections):
+        data.append('')
+        data.append(4 * ' ' + '"%s": {' % section)
+        actions = sorted(keys[section].keys())
+        for a, action in enumerate(actions):
+            data.append('')
+            data.append(8 * ' ' + '"%s": [' % action)
+            issue_keys = keys[section][action]
+            for i, issue in enumerate(issue_keys):
+                data.append('')
+                data.append(12 * ' ' + '/* %s */' % issue)
+                if issues[issue]:
+                    for line in json.dumps(issues[issue], indent=4).split('\n')[1:-1]:
+                        data.append(8 * ' ' + line)
+                    if i < len(issue_keys) - 1:
+                        data[-1] += ','
+            data.append('')
+            data.append(8 * ' ' + ']' + (',' if a < len(actions) - 1 else ''))
+        data.append('')
+        data.append(4 * ' ' + '}' + (',' if s < len(sections) - 1 else ''))
+    data.append('}')
+    with open(FILES['organize'], 'w') as f:
+        f.write('\n'.join(data))
+    print 'Next, edit %s and run pandoraclient organize -x' % FILES['organize']
+    
+
+def execute_organize():
+
+    def rename_file(source, target):
+        if not os.path.exists(source):
+            raise IOError('Source does not exist')
+        elif os.path.exists(target):
+            raise IOError('Target exists')
+        else:
+            ox.makedirs(os.path.dirname(target))
+            shutil.move(source, target)
+
+    config = get_config()
+    volume = config['volumes'].keys()[0]
+    volume_path = config['volumes'][volume]
+
+    if not os.path.exists(FILES['organize']):
+        sys.exit('%s not found' % FILES['organize'])
+
+    data = ox.jsonc.load(open(FILES['organize']))
+    remove = map(lambda x: os.path.join(volume_path, x), data['automatic']['remove'])
+    rename = map(lambda x: map(lambda y: os.path.join(volume_path, y), x), data['automatic']['rename'])
+    for path in remove:
+        try:
+            if os.path.isdir(path):
+                os.rmdir(path)
+            else:
+                os.remove(path)
+        except:
+            print "Could not remove '%s'" % (path)
+    for paths in rename:
+        source = paths[0]
+        target = paths[1] + '.pandora'
+        try:
+            rename_file(source, target)
+        except IOError as error:
+            print "Could not rename '%s' to '%s' (%s)" % (source, target, error)
+    for paths in rename:
+        source = paths[1] + '.pandora'
+        target = paths[1]
+        try:
+            print 
+            rename_file(source, target)
+        except IOError as error:
+            print "Could not rename '%s' to '%s' (%s)" % (source, target, error)
+
+
+def sync():
+
+    def get_directories(files):
+        directories = {}
+        for file in files:
+            # path = os.path.dirname(file['path'])
+            path = '/'.join(file['path'].split('/')[:3])
+            if not path in directories:
+                directories[path] = {'files': []}
+            directories[path]['files'].append(file)
+        for path in directories:
+            directories[path]['hashes'] = '\n'.join(sorted(
+                [file['hash'] for file in directories[path]['files']]
+            ))
+            directories[path]['paths'] = '\n'.join(sorted(
+                [file['path'] for file in directories[path]['files']]
+            ))
+        return directories
+
+    if len(sys.argv) != 3:
+        print 'Usage: %s sync targetfiles.json' % sys.argv[0]
+        sys.exit(1)
+
+    print 'Reading files'
+    source_file = FILES['files']
+    target_file = sys.argv[2]
+    files = [ox.json.load(open(file))['files'] for file in [source_file, target_file]]
+    directories = [get_directories(file) for file in files]
+    file_path_by_hash = [{}, {}]
+    hashes = [{}, {}]
+    paths = [{}, {}]
+    for i in [0, 1]:
+        for file in files[i]:
+            if file['hash'] in file_path_by_hash[i]:
+                print 'Duplicate file:'
+                print file_path_by_hash[i][file['hash']]
+                print file['path']
+                sys.exit()
+            file_path_by_hash[i][file['hash']] = file['path']
+        for path in sorted(directories[i].keys()):
+            directory = directories[i][path]
+            hashes[i][directory['hashes']] = path
+            paths[i][directory['paths']] = path          
+
+    print 'Determining files to sync'
+    sync = {
+        'same files, different filenames': [],
+        'same file, different filename': [],
+        'unique directory': [],
+        'unique file': []
+    }
+    for path in sorted(directories[0].keys()):
+        directory = directories[0][path]
+        file_hashes = [
+            directory['hashes'].split('\n'),
+            directories[1][path]['hashes'].split('\n')
+        ]
+        if directory['hashes'] in hashes[1]:
+            # same files
+            if not directory['paths'] in paths[1]:
+                # different filenames
+                rename = []
+                for file in directory['files']:
+                    if file['path'] != file_path_by_hash[1][file['hash']]:
+                        rename.append([file['path'], file_path_by_hash[1][file['hash']]])
+                sync['same files, different filenames'].append(rename)
+            for hash in file_hashes[0]:
+                for i in [0, 1]:
+                    del file_path_by_hash[i][hash]
+        else:
+            for hash in file_hashes[0]:
+                if not hash in file_hashes[1]:
+                    if hash in file_path_by_hash[1]:
+                        sync['same file, different filename'].append([
+                            file_path_by_hash[0][hash],
+                            file_path_by_hash[1][hash]
+                        ])
+                        del file_path_by_hash[0][hash]
+                        del file_path_by_hash[1][hash]
+                    else:
+                        sync['unique file'].append([
+                            file_path_by_hash[0][hash],
+                            None
+                        ])
+                        del file_path_by_hash[0][hash]
+            for hash in [hash for hash in file_hashes[1] if hash in file_path_by_hash[1]]:
+                sync['unique file'].append([
+                    None,
+                    file_path_by_hash[0][hash]
+                ])
+        '''
+        elif path in directories[1] and directory['hashes'] != directories[1][path]['hashes']:
+            # same directory, different files
+            file_hashes_1 = directories[1][path]['hashes'].split('\n')
+            if file_hashes_1[0] in file_path_by_hash[1]:
+                # directory in b not handled in previous step
+                sync['same directory, different files'].append([
+                    [file['path'] for file in directory['files']],
+                    [file['path'] for file in directories[1][path]['files']]
+                ])
+                for hash in file_hashes:
+                    del file_path_by_hash[0][hash]
+                for hash in file_hashes_1:
+                    del file_path_by_hash[1][hash]
+        elif len(
+            [hash for hash in file_hashes if hash in file_path_by_hash[0] and not hash in file_path_by_hash[1]]
+        ) == len(file_hashes):
+            sync['unique directory'].append([directory['files'], None])
+            for hash in file_hashes:
+                del file_path_by_hash[0][hash]
+    for path in sorted(directories[1].keys()):
+        directory = directories[1][path]
+        file_hashes = directory['hashes'].split('\n')
+        if len(
+            [hash for hash in file_hashes if hash in file_path_by_hash[1] and not hash in file_path_by_hash[0]]
+        ) == len(file_hashes):
+            sync['unique directory'].append([None, directory['paths']])
+            for hash in file_hashes:
+                del file_path_by_hash[1][hash]
+    for hash in file_path_by_hash[0]:
+        if hash in file_path_by_hash[1]:
+            sync['same file, different filenames'].append(
+                [file_path_by_hash[0][hash], file_path_by_hash[1][hash]]
+            )
+    '''
+
+    print 'Writing %s' % FILES['sync']
+    data = ['[']
+    data.append('')
+    for key in [
+        'same files, different filenames',
+        'same directory, different files',
+        'same file, different directory',
+        'unique directory'
+    ]:
+        data.append('')
+        data.append(4 * ' ' + '/* %s */' % key)
+        for paths in sync[key]:
+            data.append('')
+            if key == 'same files, different filenames':
+                data.append(4 * ' ' + '/* rename in b */')
+                data.append(',\n'.join([4 * ' ' + json.dumps(['b', 'rename'] + x) for x in paths]) + ',')
+                data.append(4 * ' ' + '/* rename in a */')
+                data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['a', 'rename'] + list(reversed(x))) for x in paths]) + ',')
+            elif key == 'same directory, different files':
+                data.append(4 * ' ' + '/* remove in b, copy from a to b */')
+                data.append(',\n'.join([4 * ' ' + json.dumps(['b', 'remove', x]) for x in paths[1]]) + ',')
+                data.append(',\n'.join([4 * ' ' + json.dumps(['a', 'copy', x]) for x in paths[0]]) + ',')
+                data.append(4 * ' ' + '/* remove in a, copy from b to a */')
+                data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['a', 'remove', x]) for x in paths[0]]) + ',')
+                data.append(',\n'.join([4 * ' ' + '// ' + json.dumps(['b', 'copy', x]) for x in paths[1]]) + ',')
+            elif key == 'same file, different directory':
+                data.append(4 * ' ' + '/* rename in b */')
+                print json.dumps(paths, indent=4)
+                data.append(4 * ' ' + json.dumps(['b', 'rename'] + paths) + ',')
+                data.append(4 * ' ' + '/* rename in a */')
+                data.append(4 * ' ' + '// ' + json.dumps(['a', 'rename'] + list(reversed(paths))) + ',')
+            else:
+                copy = ['a', 'b'] if paths[0] else ['b', 'a']
+                i = 0 if paths[0] else 1
+                data.append(4 * ' ' + '/* copy from %s to %s */' % (copy[0], copy[1]))
+                data.append('\n'.join([4 * ' ' + json.dumps([copy[0], 'copy', x]) for x in paths[i]]) + ',')
+                data.append(4 * ' ' + '/* remove in %s */' % copy[0])
+                data.append('\n'.join([4 * ' ' + '// ' + json.dumps([copy[0], 'remove', x]) for x in paths[i]]) + ',')
+    data[-1] = data[-1][:-1]
+    data.append('')
+    data.append(']')
+    with open(FILES['sync'], 'w') as f:
+        f.write('\n'.join(data))
+
+
+def execute_sync():
+
+    pass
+
+
+def update():
+
+    pass
+
+
+def execute_update():
+
+    pass
+
+
+def get_config():
+    if not os.path.exists(FILES['config']):
+        sys.exit('%s not found' % FILES['config'])
+    with open(FILES['config']) as f:
+        config = ox.jsonc.load(f)
+    return config
+
+
+if __name__ == '__main__':
+
+    actions = ['copy', 'organize', 'sync', 'upload']
+    action_string = '%s or %s' % (', '.join(actions[:-1]), actions[-1])
+    usage = 'usage: %prog action [volume] [options]'
+    description = 'Action: %s' % action_string
+    parser = OptionParser(usage=usage, description=description)
+    parser.add_option(
+        '-e', '--encode', action='store_true', dest='encode',
+        help='encode only, do not upload'
+    )
+    parser.add_option(
+        '-v', '--verbose', action='store_true', dest='verbose',
+        help='verbose output'
+    )
+    parser.add_option(
+        '-x', '--execute', action='store_true', dest='execute',
+        help='execute %s' % action_string
+    )
+    (opts, args) = parser.parse_args()
+    if len(args) == 0:
+        sys.exit(parser.get_usage())
+    action = args[0]
+    args = args[1:]
+    if action == 'copy':
+        if not opts.execute:
+            copy()
+        else:
+            copy_x()
+    elif action == 'organize':
+        if not opts.execute:
+            organize()
+        else:
+            execute_organize()
+    elif action == 'sync':
+        if not opts.execute:
+            sync()
+        else:
+            execute_sync()
+    elif action == 'upload':
+        if not opts.execute:
+            upload()
+        else:
+            execute_upload()
--- a/pandoraclient.config.jsonc
+++ b/pandoraclient.config.jsonc
@ -0,0 +1,11 @@
+{
+    "cache": "~/.ox/pandoraclient/cache.sqlite",
+    "media": "~/.ox/pandoraclient/media/",
+    "password": "",
+    "subdirectories": true,
+    "url": "https://0xdb.org/api/",
+    "username": "",
+    "volumes": {
+        "cinema": "/Users/rolux/Movies/Cinema/"
+    }
+}