diff --git a/README b/README index f9035da..d0bf779 100644 --- a/README +++ b/README @@ -2,8 +2,8 @@ To setup a local instance of oxdata: bzr branch http://code.0xdb.org/oxdata oxdata cd oxdata - virtualenv . - pip -E . install -r requirements.txt + virtualenv --system-site-packages . + ./bin/pip install -r requirements.txt you need python, bazaar, pip and virtualenv: diff --git a/oxdata/movie/models.py b/oxdata/movie/models.py index a00904d..8bd3926 100644 --- a/oxdata/movie/models.py +++ b/oxdata/movie/models.py @@ -5,6 +5,8 @@ from __future__ import division import re import unicodedata from urllib import quote +import hashlib +import base64 from django.db import models from django.conf import settings @@ -74,11 +76,15 @@ class Imdb(models.Model): seriesTitle = models.CharField(max_length=1000, blank=True, default='') invalid = models.BooleanField(default=False) - patch = DictField(default=None, blank=True) + patch = DictField(default=None, blank=True, null=True) def __unicode__(self): return u"[%s] %s%s" % (self.imdb, self.title, self.year and ' (%s)' % self.year or '') + def save(self, *args, **kwargs): + super(Imdb, self).save(*args, **kwargs) + Match.update_item(self) + keys = ('title', 'director', 'year', 'season', 'episode', 'originalTitle', 'seriesTitle', 'episodeTitle', 'episodeYear', 'episodeDirector') @@ -212,3 +218,78 @@ def get_new_ids(timeout=-1): added += 1 if added: print url, added + +class Match(models.Model): + keys = [ + ['title', 'director', 'year'], + ['title', 'director'], + ['title', 'year'], + ['director', 'year'], + ['title'], + ['director'] + ] + + key = models.CharField(max_length=28, db_index=True) + item = models.ForeignKey(Imdb, related_name='matches') + + def __unicode__(self): + return '%s(%s)' % (self.hexdigest(), self.item.imdb) + + def json(self): + return self.item.json() + + def hexdigest(self): + return base64.b64decode(self.key).encode('hex') + + @classmethod + def get_keys(cls, data): + data = { + 'title': data['title'].lower(), + 'year': str(data.get('year', '')), + 'director': ';'.join(sorted(data.get('director', []))) + } + keys = [] + for k in cls.keys: + key = '\0'.join(k) + value = '\0'.join([data[v] for v in k]) + if isinstance(value, unicode): + value = value.encode('utf-8') + value = str(value) + key = str(key) + key = base64.b64encode(hashlib.sha1(key + '\n' + value).digest()) + keys.append(key) + return keys + + @classmethod + def find(cls, data): + matches = [] + items = [] + for key in cls.get_keys(data): + for m in cls.objects.filter(key=key).order_by('item__imdb'): + if m.item.id not in items: + matches.append(m.json()) + items.append(m.item.id) + return matches + + @classmethod + def update_item(cls, item): + info = item.json() + data = [] + if 'originalTitle' in info: + data.append({ + 'title': info['originalTitle'], + 'year': info.get('year', ''), + 'director': info.get('director', []) + }) + data.append(info) + existing_keys = [m.key for m in Match.objects.filter(item=item)] + current_keys = [] + for d in data: + for key in cls.get_keys(d): + if key not in existing_keys: + m = Match(key=key, item=item) + m.save() + current_keys.append(key) + deleted_keys = list(set(existing_keys)-set(current_keys)) + if deleted_keys: + Match.objects.filter(item=item, key__in=deleted_keys).delete() diff --git a/oxdata/movie/views.py b/oxdata/movie/views.py index e0b4168..3e9754a 100644 --- a/oxdata/movie/views.py +++ b/oxdata/movie/views.py @@ -22,6 +22,13 @@ def getId(request): return render_to_json_response(response) actions.register(getId) +def getIds(request): + data = json.loads(request.POST['data']) + response = json_response() + response['items'] = models.Match.find(data) + return render_to_json_response(response) +actions.register(getIds) + def getData(request): response = json_response()