add remote request to retrieve correct filename, which implied major refactoring

This commit is contained in:
rolux 2012-09-12 22:30:41 +02:00
parent 2389e76659
commit 6fe330b0c4
1 changed files with 55 additions and 40 deletions

View File

@ -52,11 +52,12 @@ def organize():
config = get_config()
volume = config['volumes'].keys()[0]
volume_path = config['volumes'][volume]
FILES['files'] = FILES['files'] % volume
if isinstance(volume_path, str):
volume_path = volume_path.decode('utf-8')
if volume_path[-1] != '/':
volume_path += '/'
api = ox.API(config['url'])
FILES['files'] = FILES['files'] % volume
cache = {}
if os.path.exists(FILES['files']):
data = json.load(open(FILES['files']))
@ -98,31 +99,21 @@ def organize():
if is_system_file(filename):
issues['system files'].append(path)
else:
file = ox.movie.parse_path(remove_directory(path))
file['normalizedPath'] = add_directory(file['normalizedPath'])
file['path'] = path
file['size'] = os.path.getsize(full_path)
file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
files.append(file)
if not file['hash'] in files_by_hash:
files_by_hash[file['hash']] = []
files_by_hash[file['hash']].append(file)
# unexpected files, unknown extensions
# unexpected files
length = len(path.split('/'))
if length < 4:
issues['unexpected files'].append(path)
elif length == 4:
# unknown extensions
if not file['type']:
issues['unknown extensions'].append(path)
else:
# add parsed title to item key so that episodes are their own item
key = '%s/%s' % ('/'.join(file['normalizedPath'].split('/')[:-1]), file['title'])
if not key in files_by_item:
files_by_item[key] = []
files_by_item[key].append(file)
if length <= 4:
file = {}
file['path'] = path
file['size'] = os.path.getsize(full_path)
file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
files.append(file)
if not file['hash'] in files_by_hash:
files_by_hash[file['hash']] = []
files_by_hash[file['hash']].append(file)
if not dirnames and (not filenames or not filter(lambda x: not is_system_file(x), filenames)):
issues['empty directories'].append(relative_path)
@ -132,8 +123,25 @@ def organize():
issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash])))
print 'Scanning for inconsistencies'
for key in sorted(files_by_item):
item_files = files_by_item[key]
paths = api.getPath({'id': files_by_hash.keys()})['data']
for file in files:
file = dict(file, **ox.movie.parse_path(remove_directory(file['path'])))
file['normalizedPath'] = add_directory(file['normalizedPath'])
# unknown extensions
if not file['type']:
issues['unknown extensions'].append(file['path'])
path_key = 'normalizedPath'
if file['hash'] in paths and paths[file['hash']] != remove_directory(file['path']):
file['correctedPath'] = add_directory(paths[file['hash']])
path_key = 'correctedPath'
if len(path.split('/')) == 4:
file['item'] = '%s/%s' % ('/'.join(file[path_key].split('/')[:-1]), file['title'])
if not file['item'] in files_by_item:
files_by_item[file['item']] = []
files_by_item[file['item']].append(file)
for item in sorted(files_by_item):
item_files = files_by_item[item]
# missing video, missing subtitles, multiple versions
versions = ox.movie.parse_item_files(item_files)
main_files = sum([version['files'] for version in versions if version['isMainVersion']], [])
@ -147,11 +155,14 @@ def organize():
issues['missing subtitles'].append([file['path'] for file in main_files])
if other_files:
issues['multiple versions'].append([file['path'] for file in main_files + other_files])
# non-canonical filenames
# incorrect filenames, non-canonical filenames
for version in versions:
path_key = {}
for file in version['files']:
path_key[file['path']] = 'correctedPath' if 'correctedPath' in file else 'normalizedPath'
rename = [[
file['path'], file['normalizedPath']
] for file in version['files'] if file['path'] != file['normalizedPath']]
file['path'], file[path_key[file['path']]]
] for file in version['files'] if file['path'] != file[path_key[file['path']]]]
languages = {'idx': [], 'srt': [], 'sub': []}
for extension in languages:
languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension])
@ -164,29 +175,33 @@ def organize():
rename += [[
file['path'], re.sub(regexp, '', file['path'])
] for file in version['files'] if re.search(regexp, file['path'])]
if rename:
issues['non-canonical filenames'] += rename
for paths in rename:
issues['%s filenames' % (
'incorrect' if path_key[paths[0]] == 'correctedPath' else 'non-canonical'
)].append(paths)
print 'Scanning for conflicts'
existing_paths = [file['path'] for file in files]
for key in ['incorrect filenames', 'non-canonical filenames']:
key_exists = '%s (target exists)' % key
new_paths = [paths[1] for paths in issues[key]]
exists_key = '%s (target exists)' % key
path_count = {}
for path in [paths[1] for paths in issues[key]]:
path_count[path] = 1 if not path in path_count else path_count[path] + 1
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
if new_paths.count(paths[1]) > 1:
# multiple files with the same normalized path
if path_count[paths[1]] > 1:
# multiple files with the same target path
issues[key].remove(paths)
if not paths in issues[key_exists]:
issues[key_exists].append(paths)
if not paths in issues[exists_key]:
issues[exists_key].append(paths)
elif not filenames[1] in existing_paths:
# normalized path does not exist, remove original path from existing paths
# target path does not exist, remove original path from existing paths
existing_paths.remove(paths[0])
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
if paths[1] in existing_paths:
# normalized path exists
# target path exists
issues[key].remove(paths)
if not paths in issues[key_exists]:
issues[key_exists].append(paths)
if not paths in issues[exists_key]:
issues[exists_key].append(paths)
for issue in issues:
if issues[issue]: