better is_asian_name
This commit is contained in:
parent
fdc68a85e2
commit
9b4cb6fe86
1 changed files with 37 additions and 11 deletions
48
ox/text.py
48
ox/text.py
|
@ -65,7 +65,7 @@ ASIAN_FIRST_NAMES = [
|
||||||
]
|
]
|
||||||
# see http://en.wikipedia.org/wiki/List_of_common_Chinese_surnames
|
# see http://en.wikipedia.org/wiki/List_of_common_Chinese_surnames
|
||||||
# and http://en.wikipedia.org/wiki/List_of_Korean_family_names
|
# and http://en.wikipedia.org/wiki/List_of_Korean_family_names
|
||||||
ASIAN_NAMES = [
|
ASIAN_LAST_NAMES = [
|
||||||
'chan', 'chang', 'chao',
|
'chan', 'chang', 'chao',
|
||||||
'chen', 'cheong', 'cheung',
|
'chen', 'cheong', 'cheung',
|
||||||
'chong', 'choo',
|
'chong', 'choo',
|
||||||
|
@ -306,11 +306,34 @@ def get_sort_name(name):
|
||||||
return len(first_names) and first_names[-1].lower() in names
|
return len(first_names) and first_names[-1].lower() in names
|
||||||
|
|
||||||
if is_asian_name(name):
|
if is_asian_name(name):
|
||||||
|
hyphen = -1
|
||||||
names = name.split(' ')
|
names = name.split(' ')
|
||||||
if '-' in names[0]:
|
if len(names) == 2:
|
||||||
firstnames = names[0].split('-')
|
|
||||||
names[0] = '-'.join([firstnames[0], firstnames[1].lower()])
|
for i, name in enumerate(names):
|
||||||
return names[-1] + ' ' + ' '.join(names[:-1])
|
if '-' in name:
|
||||||
|
return
|
||||||
|
names = name.replace('-').split(' ')
|
||||||
|
if len(names) == 2:
|
||||||
|
if names[0].lower() in ASIAN_LAST_NAMES:
|
||||||
|
lastname, firstname = names
|
||||||
|
else:
|
||||||
|
firstname, lastname = names
|
||||||
|
else:
|
||||||
|
names_ = name.split(' ')
|
||||||
|
if '-' in names_[0]:
|
||||||
|
lastname, firstname = [names[2], names[0] + '-' + names[1].lower()]
|
||||||
|
elif '-' in names_[1]:
|
||||||
|
lastname, firstname = [names[0], names[1] = '-' + names[2].lower()]
|
||||||
|
elif names[0] in ASIAN_FIRST_NAMES and names[2] not in ASIAN_FIRST_NAMES:
|
||||||
|
lastname, firstname = [names[2], names[0] + ' ' + names[1]]
|
||||||
|
elif names[0] not in ASIAN_FIRST_NAMES and names[2] in ASIAN_FIRST_NAMES:
|
||||||
|
lastname, firstname = [names[0], names[1] + ' ' + names[2]]
|
||||||
|
elif names[0] in ASIAN_LAST_NAMES:
|
||||||
|
lastname, firstname = [names[0], names[1] + ' ' + names[2]]
|
||||||
|
else:
|
||||||
|
lastname, firstname = [names[2], names[0] + ' ' + names[1]]
|
||||||
|
return '{} {}'.format(lastname, firstname)
|
||||||
|
|
||||||
first_names = name.split(' ')
|
first_names = name.split(' ')
|
||||||
last_names = []
|
last_names = []
|
||||||
|
@ -328,7 +351,7 @@ def get_sort_name(name):
|
||||||
add_name()
|
add_name()
|
||||||
name = ' '.join(last_names)
|
name = ' '.join(last_names)
|
||||||
if len(first_names):
|
if len(first_names):
|
||||||
separator = ' ' if last_names[0].lower() in ASIAN_NAMES else ', '
|
separator = ' ' if last_names[0].lower() in ASIAN_LAST_NAMES else ', '
|
||||||
name += separator + ' '.join(first_names)
|
name += separator + ' '.join(first_names)
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
@ -385,11 +408,14 @@ def find_string(string, string0='', string1=''):
|
||||||
|
|
||||||
def is_asian_name(name):
|
def is_asian_name(name):
|
||||||
names = name.replace('-', ' ').lower().split(' ')
|
names = name.replace('-', ' ').lower().split(' ')
|
||||||
if len(names) != 3:
|
return (len(names) == 2 and not '-' in name and (
|
||||||
return False
|
(names[0] in ASIAN_FIRST_NAMES and names[1] in ASIAN_LAST_NAMES) or
|
||||||
if names[0] in ASIAN_FIRST_NAMES and names[1] in ASIAN_FIRST_NAMES:
|
(names[0] in ASIAN_LAST_NAMES and names[1] in ASIAN_FIRST_NAMES)
|
||||||
return True
|
) or (
|
||||||
return False
|
len(names) == 3 and names[1] in ASIAN_FIRST_NAMES and (
|
||||||
|
names[0] in ASIAN_FIRST_NAMES or names[2] in ASIAN_FIRST_NAMES
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def parse_useragent(useragent):
|
def parse_useragent(useragent):
|
||||||
data = {}
|
data = {}
|
||||||
|
|
Loading…
Reference in a new issue