From 12226662f86ce1f2dd26852fc48c523033e146e8 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 27 May 2012 14:21:08 +0000 Subject: [PATCH] smarter places/events update --- pandora/annotation/models.py | 17 +++++--- pandora/annotation/tasks.py | 81 +++++++++++++++++------------------- pandora/event/models.py | 27 ++++++++---- pandora/place/models.py | 32 ++++++++------ 4 files changed, 87 insertions(+), 70 deletions(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index d9a2af38..da2a49a5 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -18,10 +18,10 @@ from changelog.models import Changelog from item.utils import sort_string import managers import utils -from tasks import update_matching_events, update_matching_places, update_item +from tasks import update_matches, update_matches, update_item -def get_matches(obj, model, layer_type): +def get_super_matches(obj, model): super_matches = [] q = Q(name_find__contains=" " + obj.name)|Q(name_find__contains="|%s"%obj.name) for name in obj.alternativeNames: @@ -31,7 +31,10 @@ def get_matches(obj, model, layer_type): for name in [obj.name] + list(obj.alternativeNames): if name in othername: super_matches.append(othername) + return super_matches +def get_matches(obj, model, layer_type, qs=None): + super_matches = obj.get_super_matches() exact = [l['id'] for l in filter(lambda l: l['type'] == layer_type, settings.CONFIG['layers'])] if exact: q = Q(value__iexact=obj.name) @@ -56,7 +59,9 @@ def get_matches(obj, model, layer_type): f = contains_matches matches = [] - for a in Annotation.objects.filter(f): + if not qs: + qs = Annotation.objects.all() + for a in qs.filter(f): value = a.findvalue.lower() for name in super_matches: name = ox.decode_html(name) @@ -130,7 +135,7 @@ class Annotation(models.Model): layer = self.get_layer() if self.value: self.value = utils.cleanup_value(self.value, layer['type']) - self.findvalue = ox.decode_html(ox.strip_tags(self.value).strip()).replace('\n', ' ') + self.findvalue = ox.decode_html(ox.strip_tags(re.sub('
', ' ', self.value))).replace('\n', ' ') sortvalue = sort_string(self.findvalue) if sortvalue: self.sortvalue = sortvalue[:900] @@ -161,9 +166,9 @@ class Annotation(models.Model): #editAnnotations needs to be in snyc if layer.get('type') == 'place' or layer.get('hasPlaces'): - update_matching_places(self.id) + update_matches(self.id, 'place') if layer.get('type') == 'event' or layer.get('hasEvents'): - update_matching_events(self.id) + update_matches(self.id, 'event') def delete(self, *args, **kwargs): super(Annotation, self).delete(*args, **kwargs) diff --git a/pandora/annotation/tasks.py b/pandora/annotation/tasks.py index 41a0368e..84fa5659 100644 --- a/pandora/annotation/tasks.py +++ b/pandora/annotation/tasks.py @@ -10,63 +10,58 @@ import models @task(ignore_results=True, queue='default') -def update_matching_events(id): - from event.models import Event +def update_matches(id, type): + if type == 'place': + from place.models import Place as Model + elif type == 'event': + from event.models import Event as Model + a = models.Annotation.objects.get(pk=id) - for e in a.events.filter(defined=False).exclude(name=a.value): - if e.annotations.exclude(id=id).count() == 0: - e.delete() - for e in a.events.all(): - e.update_matches() - if a.get_layer().get('type') == 'event' and a.events.count() == 0: - a.events.add(Event.get_or_create(a.value)) - for e in a.events.all(): - e.update_matches() + a_matches = getattr(a, type == 'place' and 'places' or 'events') - if a.findvalue: - names = {} - for n in Event.objects.all().values('id', 'name', 'alternativeNames'): - names[n['id']] = [ox.decode_html(x) for x in [n['name']] + json.loads(n['alternativeNames'])] - - value = a.findvalue.lower() - update = [] - for i in names: - for name in names[i]: - if name.lower() in value: - update.append(i) - break - if update: - for e in Event.objects.filter(id__in=update): - e.update_matches() - -@task(ignore_results=True, queue='default') -def update_matching_places(id): - from place.models import Place - a = models.Annotation.objects.get(pk=id) - for p in a.places.filter(defined=False).exclude(name=a.value): + #remove undefined matches that only have this annotation + for p in a_matches.filter(defined=False).exclude(name=a.value): if p.annotations.exclude(id=id).count() == 0: p.delete() - for p in a.places.all(): - p.update_matches() - if a.get_layer().get('type') == 'place' and a.places.count() == 0: - a.places.add(Place.get_or_create(a.value)) - for p in a.places.all(): + if a.get_layer().get('type') == type and a_matches.count() == 0: + a.places.add(Model.get_or_create(a.value)) + for p in a_matches.all(): p.update_matches() if a.findvalue: names = {} - for n in Place.objects.all().values('id', 'name', 'alternativeNames'): - names[n['id']] = [ox.decode_html(x) for x in [n['name']] + json.loads(n['alternativeNames'])] + for n in Model.objects.all().values('id', 'name', 'alternativeNames'): + names[n['id']] = [ox.decode_html(x) + for x in [n['name']] + json.loads(n['alternativeNames'])] value = a.findvalue.lower() - update = [] + + current = [p.id for p in a_matches.all()] + matches = [] + name_matches = [] for i in names: for name in names[i]: if name.lower() in value: - update.append(i) + matches.append(i) + name_matches.append(name.lower()) break + new = [] + for i in matches: + p = Model.objects.get(pk=i) + #only add places/events that did not get added as a super match + #i.e. only add The Paris Region and not Paris + if not filter(lambda n: n in name_matches, + [n.lower() for n in p.get_super_matches()]): + new.append(i) + removed = filter(lambda p: p not in new, current) + added = filter(lambda p: p not in current, new) + update = removed + added if update: - for e in Place.objects.filter(id__in=update): - e.update_matches() + for e in Model.objects.filter(id__in=update): + e.update_matches(models.Annotation.objects.filter(pk=a.id)) + else: + #annotation has no value, remove all exisint matches + for e in a_matches.all(): + e.update_matches(models.Annotation.objects.filter(pk=a.id)) @task(ignore_results=True, queue='default') def update_item(id): diff --git a/pandora/event/models.py b/pandora/event/models.py index 513266da..a464a404 100644 --- a/pandora/event/models.py +++ b/pandora/event/models.py @@ -11,7 +11,7 @@ from django.conf import settings import ox from ox.django import fields -from annotation.models import Annotation, get_matches +from annotation.models import Annotation, get_matches, get_super_matches from item.models import Item from item import utils from person.models import get_name_sort @@ -86,27 +86,36 @@ class Event(models.Model): def get_matches(self): return get_matches(self, Event, 'event') + def get_super_matches(self): + return get_super_matches(self, Event) + @transaction.commit_on_success - def update_matches(self): - matches = self.get_matches() - numberofmatches = matches.count() - for a in self.annotations.exclude(id__in=matches): + def update_matches(self, annotations=None): + matches = self.get_matches(annotations) + if not annotations: + numberofmatches = matches.count() + annotations = self.annotations.all() + else: + numberofmatches = -1 + for a in annotations.exclude(id__in=matches): self.annotations.remove(a) #annotations of type event always need an event if a.get_layer().get('type') == 'event' and a.events.count() == 0: a.events.add(Event.get_or_create(a.value)) for e in a.events.all(): e.update_matches() - for i in matches.exclude(id__in=self.annotations.all()): + for a in matches.exclude(id__in=self.annotations.all()): #need to check again since editEvent might have been called again - if self.annotations.filter(id=i.id).count() == 0: - self.annotations.add(i) - ids = list(set([a.item.id for a in matches])) + if self.annotations.filter(id=a.id).count() == 0: + self.annotations.add(a) + ids = list(set([a['item_id'] for a in self.annotations.all().values('item_id')])) for i in self.items.exclude(id__in=ids): self.items.remove(i) for i in Item.objects.filter(id__in=ids).exclude(id__in=self.items.all()): if self.items.filter(id=i.id).count() == 0: self.items.add(i) + if numberofmatches < 0: + numberofmatches = self.annotations.all().count() if self.matches != numberofmatches: self.matches = numberofmatches if numberofmatches: diff --git a/pandora/place/models.py b/pandora/place/models.py index 038f343b..fd3156ae 100644 --- a/pandora/place/models.py +++ b/pandora/place/models.py @@ -12,8 +12,7 @@ import ox from ox.django import fields import managers -from annotation.models import Annotation, get_matches -from annotation.tasks import update_matching_places +from annotation.models import Annotation, get_matches, get_super_matches from item.models import Item from changelog.models import Changelog @@ -96,30 +95,39 @@ class Place(models.Model): j[key] = getattr(self, key) return j - def get_matches(self): - return get_matches(self, Place, 'place') + def get_matches(self, qs=None): + return get_matches(self, Place, 'place', qs) + + def get_super_matches(self): + return get_super_matches(self, Place) @transaction.commit_on_success - def update_matches(self): - matches = self.get_matches() - numberofmatches = matches.count() - for a in self.annotations.exclude(id__in=matches): + def update_matches(self, annotations=None): + matches = self.get_matches(annotations) + if not annotations: + numberofmatches = matches.count() + annotations = self.annotations.all() + else: + numberofmatches = -1 + for a in annotations.exclude(id__in=matches): self.annotations.remove(a) #annotations of type place always need a place if a.get_layer().get('type') == 'place' and a.places.count() == 0: a.places.add(Place.get_or_create(a.value)) for p in a.places.all(): p.update_matches() - for i in matches.exclude(id__in=self.annotations.all()): + for a in matches.exclude(id__in=self.annotations.all()): #need to check again since editEvent might have been called again - if self.annotations.filter(id=i.id).count() == 0: - self.annotations.add(i) - ids = list(set([a.item.id for a in matches])) + if self.annotations.filter(id=a.id).count() == 0: + self.annotations.add(a) + ids = list(set([a['item_id'] for a in self.annotations.all().values('item_id')])) for i in self.items.exclude(id__in=ids): self.items.remove(i) for i in Item.objects.filter(id__in=ids).exclude(id__in=self.items.all()): if self.items.filter(id=i.id).count() == 0: self.items.add(i) + if numberofmatches < 0: + numberofmatches = self.annotations.all().count() if self.matches != numberofmatches: self.matches = numberofmatches if numberofmatches: