normalize imdb values to make find more reliable for unicode titles, fixes #930

This commit is contained in:
j 2012-08-26 16:22:18 +02:00
parent 5399c467c3
commit 5482248fee

View file

@ -2,10 +2,15 @@
# vi:si:et:sw=4:sts=4:ts=4 # vi:si:et:sw=4:sts=4:ts=4
import re import re
import unicodedata
from django.db import models from django.db import models
import ox import ox
def normalize_value(value):
if isinstance(value, str):
value = value.decode('utf-8')
return unicodedata.normalize('NFD', value)
def find(info, guess=True): def find(info, guess=True):
q = Imdb.objects.all() q = Imdb.objects.all()
@ -26,9 +31,10 @@ def find(info, guess=True):
fkey = key fkey = key
if isinstance(info[key], list): if isinstance(info[key], list):
fkey = '%s__iexact'%key fkey = '%s__iexact'%key
q = q.filter(**{fkey: '\n'.join(info[key]) + '\n'}) value = normalize_value(u'\n'.join(info[key]) + '\n')
else: else:
q = q.filter(**{fkey:info[key]}) value = normalize_value(info[key])
q = q.filter(**{fkey:value})
if q.count() == 1: if q.count() == 1:
m = q[0] m = q[0]
m.update() m.update()
@ -75,7 +81,9 @@ class Imdb(models.Model):
if key == 'title' and 'seriesTitle' in info: if key == 'title' and 'seriesTitle' in info:
value = info['seriesTitle'] value = info['seriesTitle']
if isinstance(value, list): if isinstance(value, list):
value = '\n'.join(value) + '\n' value = u'\n'.join(value) + '\n'
if isinstance(value, basestring):
value = normalize_value(value)
setattr(self, key, value) setattr(self, key, value)
if self.season < 0: if self.season < 0:
self.season = None self.season = None