prepare for later asian name fixes

This commit is contained in:
j 2017-08-03 15:26:56 +02:00
parent 981b56c87d
commit 35198f8787
1 changed files with 36 additions and 18 deletions

View File

@ -26,6 +26,28 @@ def normalize_value(value):
value = unicodedata.normalize('NFD', value)
return value
def fix_asian_name(name):
if ox.is_asian_name(name):
name = ox.get_sort_name(name)
return name
def quote_string(string):
return quote(string).replace('_', '%09').replace('%20', '_')
def fix_names(m):
name = fix_asian_name(m.group(2))
return '<a href="/name=%s">%s</a>' % (quote_string(name.encode('utf-8')), name)
def fix_titles(m):
return '<a href="/title=%s">%s</a>' % (
quote_string(m.group(2).encode('utf-8')), m.group(2)
)
def fix_links(t):
t = re.sub('<a href="(/name/.*?)">(.*?)</a>', fix_names, t)
t = re.sub('<a href="(/title/.*?)">(.*?)</a>', fix_titles, t)
return t
def find(info, guess=True):
q = Imdb.objects.all()
if 'id' in info:
@ -137,30 +159,26 @@ class Imdb(models.Model):
data = ox.web.imdb.Imdb(self.imdb, timeout=timeout)
data = self.apply_patch(data)
def quote_string(string):
return quote(string).replace('_', '%09').replace('%20', '_')
def fix_names(m):
return '<a href="/name=%s">%s</a>' % (
quote_string(m.group(2).encode('utf-8')), m.group(2)
)
def fix_titles(m):
return '<a href="/title=%s">%s</a>' % (
quote_string(m.group(2).encode('utf-8')), m.group(2)
)
def fix_links(t):
t = re.sub('<a href="(/name/.*?)">(.*?)</a>', fix_names, t)
t = re.sub('<a href="(/title/.*?)">(.*?)</a>', fix_titles, t)
return t
if 'trivia' in data:
data['trivia'] = [fix_links(t) for t in data['trivia']]
if 'summary' in data:
data['summary'] = fix_links(data['summary'])
'''
for key in ('director', 'actor', 'writer', 'producer', 'editor', 'composer'):
if key in data:
data[key] = [fix_asian_name(name) for name in data[key]]
if 'cast' in data:
for c in data['cast']:
c['actor'] = fix_asian_name(c['actor'])
if 'credits' in data:
for c in data['credits']:
c['name'] = fix_asian_name(c['name'])
'''
if 'rating' in data:
data['rating'] = float(data['rating']) * 10