don't leave orphaned extras/segments/versions directories

This commit is contained in:
rolux 2012-11-06 19:35:30 +01:00
parent 97018012a3
commit 1b10a09618

View file

@ -21,6 +21,7 @@ FILES = {
'organize': 'pandoraclient.organize.jsonc',
'sync': 'pandoraclient.sync.jsonc'
}
DIRECTORIES = ['Extras', 'Segments', 'Versions']
def copy():
@ -64,10 +65,11 @@ def organize():
if data['path'] == volume_path:
for file in data['files']:
cache['%s %s %s' % (file['path'], str(file['size']), file['time'])] = file['hash']
directories = []
directories_by_path = {}
files = []
files_by_hash = {}
files_by_item = {}
files_by_path = {}
issues = {
'duplicate files': [],
'empty directories': [],
@ -91,9 +93,11 @@ def organize():
for absolute_path, dirnames, filenames in os.walk(volume_path, followlinks=True):
absolute_path = unicodedata.normalize('NFD', absolute_path)
relative_path = absolute_path[len(volume_path):]
parts = relative_path.split('/')
length = len(parts)
for filename in ox.sorted_strings(filenames):
filename = unicodedata.normalize('NFD', filename)
if relative_path != previous_path and len(relative_path.split('/')) == 3:
if relative_path != previous_path and length == 3:
print relative_path
previous_path = relative_path
full_path = os.path.join(absolute_path, filename)
@ -103,10 +107,9 @@ def organize():
else:
# unexpected files, unexpected directories
parts = path.split('/')
length = len(parts)
if length < 4:
if length < 3:
issues['unexpected files'].append(path)
if length <= 4:
if length <= 3:
file = {}
file['path'] = path
file['size'] = os.path.getsize(full_path)
@ -117,13 +120,23 @@ def organize():
if not file['hash'] in files_by_hash:
files_by_hash[file['hash']] = []
files_by_hash[file['hash']].append(file)
elif not parts[3] in ['Extras', 'Segments', 'Versions']:
if not relative_path in files_by_path:
files_by_path[relative_path] = []
files_by_path[relative_path].append(path)
elif parts[3] in DIRECTORIES:
path_key = '/'.join(parts[:3])
if not path_key in directories_by_path:
directories_by_path[path_key] = []
directory = '/'.join(parts[:4])
if not directory in directories_by_path[path_key]:
directories_by_path[path_key].append(directory)
else:
issues['unexpected directories'].append(path)
# empty directories, missing files
if not filenames or not filter(lambda x: not is_system_file(x), filenames):
if not dirnames:
issues['empty directories'].append(relative_path)
elif len(relative_path.split('/')) == 3:
elif length == 3:
issues['missing files'].append(relative_path)
print 'Scanning for duplicates'
@ -212,6 +225,29 @@ def organize():
if not paths in issues[exists_key]:
issues[exists_key].append(paths)
# orphaned directories
new_path = {}
rename_key = {}
for key in ['incorrect filenames', 'non-canonical filenames']:
for i, paths in enumerate(issues[key]):
new_path[paths[0]] = '/'.join(paths[1].split('/')[:-1])
if not new_path[paths[0]] in rename_key:
rename_key[new_path[paths[0]]] = key
for path in [path for path in files_by_path if path in directories_by_path]:
new_paths = []
for path_file in files_by_path[path]:
if path_file in new_path:
new_paths.append(new_path[path_file])
if len(new_paths) == len(files_by_path[path]) and len(set(new_paths)) == 1 and new_paths[0] != path:
for directory in directories_by_path[path]:
new_directory = os.path.join(new_paths[0], directory.split('/')[-1])
key = rename_key[new_paths[0]]
exists_key = '%s (target exists)' % key
issues[exists_key if os.path.exists(new_directory) else key].append([directory, new_directory])
for key in ['incorrect filenames', 'non-canonical filenames']:
for key in [key, '%s (target exists)' % key]:
issues[key] = sorted(issues[key], key=lambda x: x[0].lower())
for issue in issues:
if issues[issue]:
if isinstance(issues[issue][0], str):
@ -221,8 +257,8 @@ def organize():
keys = {
'automatic': {
'remove': [
'system files',
'empty directories',
'system files',
'unexpected files',
'unknown extensions'
],