From 1b10a096181f1b53dc3f572d35d822d34e31240b Mon Sep 17 00:00:00 2001 From: rolux Date: Tue, 6 Nov 2012 19:35:30 +0100 Subject: [PATCH] don't leave orphaned extras/segments/versions directories --- pandoraclient | 52 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/pandoraclient b/pandoraclient index 155bfa1..738f360 100755 --- a/pandoraclient +++ b/pandoraclient @@ -21,6 +21,7 @@ FILES = { 'organize': 'pandoraclient.organize.jsonc', 'sync': 'pandoraclient.sync.jsonc' } +DIRECTORIES = ['Extras', 'Segments', 'Versions'] def copy(): @@ -64,10 +65,11 @@ def organize(): if data['path'] == volume_path: for file in data['files']: cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash'] - directories = [] + directories_by_path = {} files = [] files_by_hash = {} files_by_item = {} + files_by_path = {} issues = { 'duplicate files': [], 'empty directories': [], @@ -91,9 +93,11 @@ def organize(): for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True): absolute_path = unicodedata.normalize('NFD', absolute_path) relative_path = absolute_path[len(volume_path):] + parts = relative_path.split('/') + length = len(parts) for filename in ox.sorted_strings(filenames): filename = unicodedata.normalize('NFD', filename) - if relative_path != previous_path and len(relative_path.split('/')) == 3: + if relative_path != previous_path and length == 3: print relative_path previous_path = relative_path full_path = os.path.join(absolute_path, filename) @@ -103,10 +107,9 @@ def organize(): else: # unexpected files, unexpected directories parts = path.split('/') - length = len(parts) - if length < 4: + if length < 3: issues['unexpected files'].append(path) - if length <= 4: + if length <= 3: file = {} file['path'] = path file['size'] = os.path.getsize(full_path) @@ -117,13 +120,23 @@ def organize(): if not file['hash'] in files_by_hash: files_by_hash[file['hash']] = [] files_by_hash[file['hash']].append(file) - elif not parts[3] in ['Extras', 'Segments', 'Versions']: + if not relative_path in files_by_path: + files_by_path[relative_path] = [] + files_by_path[relative_path].append(path) + elif parts[3] in DIRECTORIES: + path_key = '/'.join(parts[:3]) + if not path_key in directories_by_path: + directories_by_path[path_key] = [] + directory = '/'.join(parts[:4]) + if not directory in directories_by_path[path_key]: + directories_by_path[path_key].append(directory) + else: issues['unexpected directories'].append(path) # empty directories, missing files if not filenames or not filter(lambda x: not is_system_file(x), filenames): if not dirnames: issues['empty directories'].append(relative_path) - elif len(relative_path.split('/')) == 3: + elif length == 3: issues['missing files'].append(relative_path) print 'Scanning for duplicates' @@ -212,6 +225,29 @@ def organize(): if not paths in issues[exists_key]: issues[exists_key].append(paths) + # orphaned directories + new_path = {} + rename_key = {} + for key in ['incorrect filenames', 'non-canonical filenames']: + for i, paths in enumerate(issues[key]): + new_path[paths[0]] = '/'.join(paths[1].split('/')[:-1]) + if not new_path[paths[0]] in rename_key: + rename_key[new_path[paths[0]]] = key + for path in [path for path in files_by_path if path in directories_by_path]: + new_paths = [] + for path_file in files_by_path[path]: + if path_file in new_path: + new_paths.append(new_path[path_file]) + if len(new_paths) == len(files_by_path[path]) and len(set(new_paths)) == 1 and new_paths[0] != path: + for directory in directories_by_path[path]: + new_directory = os.path.join(new_paths[0], directory.split('/')[-1]) + key = rename_key[new_paths[0]] + exists_key = '%s (target exists)' % key + issues[exists_key if os.path.exists(new_directory) else key].append([directory, new_directory]) + for key in ['incorrect filenames', 'non-canonical filenames']: + for key in [key, '%s (target exists)' % key]: + issues[key] = sorted(issues[key], key=lambda x: x[0].lower()) + for issue in issues: if issues[issue]: if isinstance(issues[issue][0], str): @@ -221,8 +257,8 @@ def organize(): keys = { 'automatic': { 'remove': [ - 'system files', 'empty directories', + 'system files', 'unexpected files', 'unknown extensions' ],