better is_asian_name

This commit is contained in:
rlx 2017-08-02 19:46:40 +02:00
parent fdc68a85e2
commit 9b4cb6fe86

View file

@ -65,7 +65,7 @@ ASIAN_FIRST_NAMES = [
]
# see http://en.wikipedia.org/wiki/List_of_common_Chinese_surnames
# and http://en.wikipedia.org/wiki/List_of_Korean_family_names
ASIAN_NAMES = [
ASIAN_LAST_NAMES = [
'chan', 'chang', 'chao',
'chen', 'cheong', 'cheung',
'chong', 'choo',
@ -306,11 +306,34 @@ def get_sort_name(name):
return len(first_names) and first_names[-1].lower() in names
if is_asian_name(name):
hyphen = -1
names = name.split(' ')
if '-' in names[0]:
firstnames = names[0].split('-')
names[0] = '-'.join([firstnames[0], firstnames[1].lower()])
return names[-1] + ' ' + ' '.join(names[:-1])
if len(names) == 2:
for i, name in enumerate(names):
if '-' in name:
return
names = name.replace('-').split(' ')
if len(names) == 2:
if names[0].lower() in ASIAN_LAST_NAMES:
lastname, firstname = names
else:
firstname, lastname = names
else:
names_ = name.split(' ')
if '-' in names_[0]:
lastname, firstname = [names[2], names[0] + '-' + names[1].lower()]
elif '-' in names_[1]:
lastname, firstname = [names[0], names[1] = '-' + names[2].lower()]
elif names[0] in ASIAN_FIRST_NAMES and names[2] not in ASIAN_FIRST_NAMES:
lastname, firstname = [names[2], names[0] + ' ' + names[1]]
elif names[0] not in ASIAN_FIRST_NAMES and names[2] in ASIAN_FIRST_NAMES:
lastname, firstname = [names[0], names[1] + ' ' + names[2]]
elif names[0] in ASIAN_LAST_NAMES:
lastname, firstname = [names[0], names[1] + ' ' + names[2]]
else:
lastname, firstname = [names[2], names[0] + ' ' + names[1]]
return '{} {}'.format(lastname, firstname)
first_names = name.split(' ')
last_names = []
@ -328,7 +351,7 @@ def get_sort_name(name):
add_name()
name = ' '.join(last_names)
if len(first_names):
separator = ' ' if last_names[0].lower() in ASIAN_NAMES else ', '
separator = ' ' if last_names[0].lower() in ASIAN_LAST_NAMES else ', '
name += separator + ' '.join(first_names)
return name
@ -385,11 +408,14 @@ def find_string(string, string0='', string1=''):
def is_asian_name(name):
names = name.replace('-', ' ').lower().split(' ')
if len(names) != 3:
return False
if names[0] in ASIAN_FIRST_NAMES and names[1] in ASIAN_FIRST_NAMES:
return True
return False
return (len(names) == 2 and not '-' in name and (
(names[0] in ASIAN_FIRST_NAMES and names[1] in ASIAN_LAST_NAMES) or
(names[0] in ASIAN_LAST_NAMES and names[1] in ASIAN_FIRST_NAMES)
) or (
len(names) == 3 and names[1] in ASIAN_FIRST_NAMES and (
names[0] in ASIAN_FIRST_NAMES or names[2] in ASIAN_FIRST_NAMES
)
)
def parse_useragent(useragent):
data = {}