From f191e766aed2e9713d6ab2763eecc0e701a58364 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 5 Sep 2016 16:47:02 +0200 Subject: [PATCH] =?UTF-8?q?fix=20matching=20=5F=20in=20unicode=20string:?= =?UTF-8?q?=20Re=CC=81alite=CC=81=5FLogique=20->=20Re=CC=81alite=CC=81/Log?= =?UTF-8?q?ique?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ox/movie.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ox/movie.py b/ox/movie.py index bb93101..c46605b 100644 --- a/ox/movie.py +++ b/ox/movie.py @@ -193,20 +193,24 @@ def parse_path(path, directory_key='director'): if string in EXTENSIONS[type]: return type return None + def parse_underscores(string): + string = unicodedata.normalize('NFC', string) # '^_' or '_$' is '.' string = re.sub('^_', '.', string) string = re.sub('_$', '.', string) # '_.foo$' or '_ (' is '?' - string = re.sub('_(?=(\.\w+$| \())', '?', string) + string = re.sub(re.compile('_(?=(\.\w+$| \())', re.U), '?', string) # ' _..._ ' is '<...>' string = re.sub('(?<= )_(.+)_(?= )', '<\g<1>>', string) # 'foo_bar' or 'foo _ bar' is '/' - string = re.sub('(?<=\w)_(?=\w)', '/', string) + string = re.sub(re.compile('(?<=\w)_(?=\w)', re.U), '/', string) string = re.sub(' _ ', ' / ', string) # 'foo_ ' is ':' - string = re.sub('(?<=\w)_ ', ': ', string) + string = re.sub(re.compile('(?<=\w)_ ', re.U), ': ', string) + string = unicodedata.normalize('NFD', string) return string + data = {} parts = list(map(lambda x: parse_underscores(x.strip()), unicodedata.normalize('NFD', path).split('/'))) # subdirectory