From 5482248feef2b4c7294d66d33f1758d02c39a9f2 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 26 Aug 2012 16:22:18 +0200 Subject: [PATCH] normalize imdb values to make find more reliable for unicode titles, fixes #930 --- oxdata/movie/models.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/oxdata/movie/models.py b/oxdata/movie/models.py index 0336ea2..2e69f87 100644 --- a/oxdata/movie/models.py +++ b/oxdata/movie/models.py @@ -2,10 +2,15 @@ # vi:si:et:sw=4:sts=4:ts=4 import re +import unicodedata from django.db import models import ox +def normalize_value(value): + if isinstance(value, str): + value = value.decode('utf-8') + return unicodedata.normalize('NFD', value) def find(info, guess=True): q = Imdb.objects.all() @@ -26,9 +31,10 @@ def find(info, guess=True): fkey = key if isinstance(info[key], list): fkey = '%s__iexact'%key - q = q.filter(**{fkey: '\n'.join(info[key]) + '\n'}) + value = normalize_value(u'\n'.join(info[key]) + '\n') else: - q = q.filter(**{fkey:info[key]}) + value = normalize_value(info[key]) + q = q.filter(**{fkey:value}) if q.count() == 1: m = q[0] m.update() @@ -75,7 +81,9 @@ class Imdb(models.Model): if key == 'title' and 'seriesTitle' in info: value = info['seriesTitle'] if isinstance(value, list): - value = '\n'.join(value) + '\n' + value = u'\n'.join(value) + '\n' + if isinstance(value, basestring): + value = normalize_value(value) setattr(self, key, value) if self.season < 0: self.season = None