better is_asian_name
This commit is contained in:
parent
fdc68a85e2
commit
9b4cb6fe86
1 changed files with 37 additions and 11 deletions
48
ox/text.py
48
ox/text.py
|
@ -65,7 +65,7 @@ ASIAN_FIRST_NAMES = [
|
|||
]
|
||||
# see http://en.wikipedia.org/wiki/List_of_common_Chinese_surnames
|
||||
# and http://en.wikipedia.org/wiki/List_of_Korean_family_names
|
||||
ASIAN_NAMES = [
|
||||
ASIAN_LAST_NAMES = [
|
||||
'chan', 'chang', 'chao',
|
||||
'chen', 'cheong', 'cheung',
|
||||
'chong', 'choo',
|
||||
|
@ -306,11 +306,34 @@ def get_sort_name(name):
|
|||
return len(first_names) and first_names[-1].lower() in names
|
||||
|
||||
if is_asian_name(name):
|
||||
hyphen = -1
|
||||
names = name.split(' ')
|
||||
if '-' in names[0]:
|
||||
firstnames = names[0].split('-')
|
||||
names[0] = '-'.join([firstnames[0], firstnames[1].lower()])
|
||||
return names[-1] + ' ' + ' '.join(names[:-1])
|
||||
if len(names) == 2:
|
||||
|
||||
for i, name in enumerate(names):
|
||||
if '-' in name:
|
||||
return
|
||||
names = name.replace('-').split(' ')
|
||||
if len(names) == 2:
|
||||
if names[0].lower() in ASIAN_LAST_NAMES:
|
||||
lastname, firstname = names
|
||||
else:
|
||||
firstname, lastname = names
|
||||
else:
|
||||
names_ = name.split(' ')
|
||||
if '-' in names_[0]:
|
||||
lastname, firstname = [names[2], names[0] + '-' + names[1].lower()]
|
||||
elif '-' in names_[1]:
|
||||
lastname, firstname = [names[0], names[1] = '-' + names[2].lower()]
|
||||
elif names[0] in ASIAN_FIRST_NAMES and names[2] not in ASIAN_FIRST_NAMES:
|
||||
lastname, firstname = [names[2], names[0] + ' ' + names[1]]
|
||||
elif names[0] not in ASIAN_FIRST_NAMES and names[2] in ASIAN_FIRST_NAMES:
|
||||
lastname, firstname = [names[0], names[1] + ' ' + names[2]]
|
||||
elif names[0] in ASIAN_LAST_NAMES:
|
||||
lastname, firstname = [names[0], names[1] + ' ' + names[2]]
|
||||
else:
|
||||
lastname, firstname = [names[2], names[0] + ' ' + names[1]]
|
||||
return '{} {}'.format(lastname, firstname)
|
||||
|
||||
first_names = name.split(' ')
|
||||
last_names = []
|
||||
|
@ -328,7 +351,7 @@ def get_sort_name(name):
|
|||
add_name()
|
||||
name = ' '.join(last_names)
|
||||
if len(first_names):
|
||||
separator = ' ' if last_names[0].lower() in ASIAN_NAMES else ', '
|
||||
separator = ' ' if last_names[0].lower() in ASIAN_LAST_NAMES else ', '
|
||||
name += separator + ' '.join(first_names)
|
||||
return name
|
||||
|
||||
|
@ -385,11 +408,14 @@ def find_string(string, string0='', string1=''):
|
|||
|
||||
def is_asian_name(name):
|
||||
names = name.replace('-', ' ').lower().split(' ')
|
||||
if len(names) != 3:
|
||||
return False
|
||||
if names[0] in ASIAN_FIRST_NAMES and names[1] in ASIAN_FIRST_NAMES:
|
||||
return True
|
||||
return False
|
||||
return (len(names) == 2 and not '-' in name and (
|
||||
(names[0] in ASIAN_FIRST_NAMES and names[1] in ASIAN_LAST_NAMES) or
|
||||
(names[0] in ASIAN_LAST_NAMES and names[1] in ASIAN_FIRST_NAMES)
|
||||
) or (
|
||||
len(names) == 3 and names[1] in ASIAN_FIRST_NAMES and (
|
||||
names[0] in ASIAN_FIRST_NAMES or names[2] in ASIAN_FIRST_NAMES
|
||||
)
|
||||
)
|
||||
|
||||
def parse_useragent(useragent):
|
||||
data = {}
|
||||
|
|
Loading…
Reference in a new issue