From 35198f8787d16d7bc5e221847575330b5180e111 Mon Sep 17 00:00:00 2001 From: j Date: Thu, 3 Aug 2017 15:26:56 +0200 Subject: [PATCH] prepare for later asian name fixes --- oxdata/movie/models.py | 54 ++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/oxdata/movie/models.py b/oxdata/movie/models.py index c0f1f8c..d6c928c 100644 --- a/oxdata/movie/models.py +++ b/oxdata/movie/models.py @@ -26,6 +26,28 @@ def normalize_value(value): value = unicodedata.normalize('NFD', value) return value +def fix_asian_name(name): + if ox.is_asian_name(name): + name = ox.get_sort_name(name) + return name + +def quote_string(string): + return quote(string).replace('_', '%09').replace('%20', '_') + +def fix_names(m): + name = fix_asian_name(m.group(2)) + return '%s' % (quote_string(name.encode('utf-8')), name) + +def fix_titles(m): + return '%s' % ( + quote_string(m.group(2).encode('utf-8')), m.group(2) + ) + +def fix_links(t): + t = re.sub('(.*?)', fix_names, t) + t = re.sub('(.*?)', fix_titles, t) + return t + def find(info, guess=True): q = Imdb.objects.all() if 'id' in info: @@ -137,30 +159,26 @@ class Imdb(models.Model): data = ox.web.imdb.Imdb(self.imdb, timeout=timeout) data = self.apply_patch(data) - def quote_string(string): - return quote(string).replace('_', '%09').replace('%20', '_') - - def fix_names(m): - return '%s' % ( - quote_string(m.group(2).encode('utf-8')), m.group(2) - ) - - def fix_titles(m): - return '%s' % ( - quote_string(m.group(2).encode('utf-8')), m.group(2) - ) - - def fix_links(t): - t = re.sub('(.*?)', fix_names, t) - t = re.sub('(.*?)', fix_titles, t) - return t - if 'trivia' in data: data['trivia'] = [fix_links(t) for t in data['trivia']] if 'summary' in data: data['summary'] = fix_links(data['summary']) + ''' + for key in ('director', 'actor', 'writer', 'producer', 'editor', 'composer'): + if key in data: + data[key] = [fix_asian_name(name) for name in data[key]] + + if 'cast' in data: + for c in data['cast']: + c['actor'] = fix_asian_name(c['actor']) + + if 'credits' in data: + for c in data['credits']: + c['name'] = fix_asian_name(c['name']) + ''' + if 'rating' in data: data['rating'] = float(data['rating']) * 10