add remote request to retrieve correct filename, which implied major refactoring

This commit is contained in:
rolux 2012-09-12 22:30:41 +02:00
parent 2389e76659
commit 6fe330b0c4

View file

@ -52,11 +52,12 @@ def organize():
config = get_config() config = get_config()
volume = config['volumes'].keys()[0] volume = config['volumes'].keys()[0]
volume_path = config['volumes'][volume] volume_path = config['volumes'][volume]
FILES['files'] = FILES['files'] % volume
if isinstance(volume_path, str): if isinstance(volume_path, str):
volume_path = volume_path.decode('utf-8') volume_path = volume_path.decode('utf-8')
if volume_path[-1] != '/': if volume_path[-1] != '/':
volume_path += '/' volume_path += '/'
api = ox.API(config['url'])
FILES['files'] = FILES['files'] % volume
cache = {} cache = {}
if os.path.exists(FILES['files']): if os.path.exists(FILES['files']):
data = json.load(open(FILES['files'])) data = json.load(open(FILES['files']))
@ -98,31 +99,21 @@ def organize():
if is_system_file(filename): if is_system_file(filename):
issues['system files'].append(path) issues['system files'].append(path)
else: else:
file = ox.movie.parse_path(remove_directory(path)) # unexpected files
file['normalizedPath'] = add_directory(file['normalizedPath'])
file['path'] = path
file['size'] = os.path.getsize(full_path)
file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
files.append(file)
if not file['hash'] in files_by_hash:
files_by_hash[file['hash']] = []
files_by_hash[file['hash']].append(file)
# unexpected files, unknown extensions
length = len(path.split('/')) length = len(path.split('/'))
if length < 4: if length < 4:
issues['unexpected files'].append(path) issues['unexpected files'].append(path)
elif length == 4: if length <= 4:
# unknown extensions file = {}
if not file['type']: file['path'] = path
issues['unknown extensions'].append(path) file['size'] = os.path.getsize(full_path)
else: file['time'] = datetime.datetime.utcfromtimestamp(os.stat(full_path).st_mtime).isoformat()
# add parsed title to item key so that episodes are their own item key = '%s %s %s' % (file['path'], str(file['size']), file['time'])
key = '%s/%s' % ('/'.join(file['normalizedPath'].split('/')[:-1]), file['title']) file['hash'] = cache[key] if key in cache else ox.oshash(full_path)
if not key in files_by_item: files.append(file)
files_by_item[key] = [] if not file['hash'] in files_by_hash:
files_by_item[key].append(file) files_by_hash[file['hash']] = []
files_by_hash[file['hash']].append(file)
if not dirnames and (not filenames or not filter(lambda x: not is_system_file(x), filenames)): if not dirnames and (not filenames or not filter(lambda x: not is_system_file(x), filenames)):
issues['empty directories'].append(relative_path) issues['empty directories'].append(relative_path)
@ -132,8 +123,25 @@ def organize():
issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash]))) issues['duplicate files'].append(sorted(map(lambda x: x['path'], files_by_hash[hash])))
print 'Scanning for inconsistencies' print 'Scanning for inconsistencies'
for key in sorted(files_by_item): paths = api.getPath({'id': files_by_hash.keys()})['data']
item_files = files_by_item[key] for file in files:
file = dict(file, **ox.movie.parse_path(remove_directory(file['path'])))
file['normalizedPath'] = add_directory(file['normalizedPath'])
# unknown extensions
if not file['type']:
issues['unknown extensions'].append(file['path'])
path_key = 'normalizedPath'
if file['hash'] in paths and paths[file['hash']] != remove_directory(file['path']):
file['correctedPath'] = add_directory(paths[file['hash']])
path_key = 'correctedPath'
if len(path.split('/')) == 4:
file['item'] = '%s/%s' % ('/'.join(file[path_key].split('/')[:-1]), file['title'])
if not file['item'] in files_by_item:
files_by_item[file['item']] = []
files_by_item[file['item']].append(file)
for item in sorted(files_by_item):
item_files = files_by_item[item]
# missing video, missing subtitles, multiple versions # missing video, missing subtitles, multiple versions
versions = ox.movie.parse_item_files(item_files) versions = ox.movie.parse_item_files(item_files)
main_files = sum([version['files'] for version in versions if version['isMainVersion']], []) main_files = sum([version['files'] for version in versions if version['isMainVersion']], [])
@ -147,11 +155,14 @@ def organize():
issues['missing subtitles'].append([file['path'] for file in main_files]) issues['missing subtitles'].append([file['path'] for file in main_files])
if other_files: if other_files:
issues['multiple versions'].append([file['path'] for file in main_files + other_files]) issues['multiple versions'].append([file['path'] for file in main_files + other_files])
# non-canonical filenames # incorrect filenames, non-canonical filenames
for version in versions: for version in versions:
path_key = {}
for file in version['files']:
path_key[file['path']] = 'correctedPath' if 'correctedPath' in file else 'normalizedPath'
rename = [[ rename = [[
file['path'], file['normalizedPath'] file['path'], file[path_key[file['path']]]
] for file in version['files'] if file['path'] != file['normalizedPath']] ] for file in version['files'] if file['path'] != file[path_key[file['path']]]]
languages = {'idx': [], 'srt': [], 'sub': []} languages = {'idx': [], 'srt': [], 'sub': []}
for extension in languages: for extension in languages:
languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension]) languages[extension] = set([file['language'] for file in version['files'] if file['extension'] == extension])
@ -164,29 +175,33 @@ def organize():
rename += [[ rename += [[
file['path'], re.sub(regexp, '', file['path']) file['path'], re.sub(regexp, '', file['path'])
] for file in version['files'] if re.search(regexp, file['path'])] ] for file in version['files'] if re.search(regexp, file['path'])]
if rename: for paths in rename:
issues['non-canonical filenames'] += rename issues['%s filenames' % (
'incorrect' if path_key[paths[0]] == 'correctedPath' else 'non-canonical'
)].append(paths)
print 'Scanning for conflicts' print 'Scanning for conflicts'
existing_paths = [file['path'] for file in files] existing_paths = [file['path'] for file in files]
for key in ['incorrect filenames', 'non-canonical filenames']: for key in ['incorrect filenames', 'non-canonical filenames']:
key_exists = '%s (target exists)' % key exists_key = '%s (target exists)' % key
new_paths = [paths[1] for paths in issues[key]] path_count = {}
for path in [paths[1] for paths in issues[key]]:
path_count[path] = 1 if not path in path_count else path_count[path] + 1
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]: for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
if new_paths.count(paths[1]) > 1: if path_count[paths[1]] > 1:
# multiple files with the same normalized path # multiple files with the same target path
issues[key].remove(paths) issues[key].remove(paths)
if not paths in issues[key_exists]: if not paths in issues[exists_key]:
issues[key_exists].append(paths) issues[exists_key].append(paths)
elif not filenames[1] in existing_paths: elif not filenames[1] in existing_paths:
# normalized path does not exist, remove original path from existing paths # target path does not exist, remove original path from existing paths
existing_paths.remove(paths[0]) existing_paths.remove(paths[0])
for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]: for paths in [paths for paths in issues[key] if paths[0].lower() != paths[1].lower()]:
if paths[1] in existing_paths: if paths[1] in existing_paths:
# normalized path exists # target path exists
issues[key].remove(paths) issues[key].remove(paths)
if not paths in issues[key_exists]: if not paths in issues[exists_key]:
issues[key_exists].append(paths) issues[exists_key].append(paths)
for issue in issues: for issue in issues:
if issues[issue]: if issues[issue]: