fix matching _ in unicode string: Réalité_Logique -> Réalité/Logique

This commit is contained in:
j 2016-09-05 16:47:02 +02:00
parent 06e95b816a
commit f191e766ae

View file

@ -193,20 +193,24 @@ def parse_path(path, directory_key='director'):
if string in EXTENSIONS[type]: if string in EXTENSIONS[type]:
return type return type
return None return None
def parse_underscores(string): def parse_underscores(string):
string = unicodedata.normalize('NFC', string)
# '^_' or '_$' is '.' # '^_' or '_$' is '.'
string = re.sub('^_', '.', string) string = re.sub('^_', '.', string)
string = re.sub('_$', '.', string) string = re.sub('_$', '.', string)
# '_.foo$' or '_ (' is '?' # '_.foo$' or '_ (' is '?'
string = re.sub('_(?=(\.\w+$| \())', '?', string) string = re.sub(re.compile('_(?=(\.\w+$| \())', re.U), '?', string)
# ' _..._ ' is '<...>' # ' _..._ ' is '<...>'
string = re.sub('(?<= )_(.+)_(?= )', '<\g<1>>', string) string = re.sub('(?<= )_(.+)_(?= )', '<\g<1>>', string)
# 'foo_bar' or 'foo _ bar' is '/' # 'foo_bar' or 'foo _ bar' is '/'
string = re.sub('(?<=\w)_(?=\w)', '/', string) string = re.sub(re.compile('(?<=\w)_(?=\w)', re.U), '/', string)
string = re.sub(' _ ', ' / ', string) string = re.sub(' _ ', ' / ', string)
# 'foo_ ' is ':' # 'foo_ ' is ':'
string = re.sub('(?<=\w)_ ', ': ', string) string = re.sub(re.compile('(?<=\w)_ ', re.U), ': ', string)
string = unicodedata.normalize('NFD', string)
return string return string
data = {} data = {}
parts = list(map(lambda x: parse_underscores(x.strip()), unicodedata.normalize('NFD', path).split('/'))) parts = list(map(lambda x: parse_underscores(x.strip()), unicodedata.normalize('NFD', path).split('/')))
# subdirectory # subdirectory