smarter places/events update

This commit is contained in:
j 2012-05-27 14:21:08 +00:00
parent 59f575d489
commit 12226662f8
4 changed files with 87 additions and 70 deletions

View file

@ -18,10 +18,10 @@ from changelog.models import Changelog
from item.utils import sort_string from item.utils import sort_string
import managers import managers
import utils import utils
from tasks import update_matching_events, update_matching_places, update_item from tasks import update_matches, update_matches, update_item
def get_matches(obj, model, layer_type): def get_super_matches(obj, model):
super_matches = [] super_matches = []
q = Q(name_find__contains=" " + obj.name)|Q(name_find__contains="|%s"%obj.name) q = Q(name_find__contains=" " + obj.name)|Q(name_find__contains="|%s"%obj.name)
for name in obj.alternativeNames: for name in obj.alternativeNames:
@ -31,7 +31,10 @@ def get_matches(obj, model, layer_type):
for name in [obj.name] + list(obj.alternativeNames): for name in [obj.name] + list(obj.alternativeNames):
if name in othername: if name in othername:
super_matches.append(othername) super_matches.append(othername)
return super_matches
def get_matches(obj, model, layer_type, qs=None):
super_matches = obj.get_super_matches()
exact = [l['id'] for l in filter(lambda l: l['type'] == layer_type, settings.CONFIG['layers'])] exact = [l['id'] for l in filter(lambda l: l['type'] == layer_type, settings.CONFIG['layers'])]
if exact: if exact:
q = Q(value__iexact=obj.name) q = Q(value__iexact=obj.name)
@ -56,7 +59,9 @@ def get_matches(obj, model, layer_type):
f = contains_matches f = contains_matches
matches = [] matches = []
for a in Annotation.objects.filter(f): if not qs:
qs = Annotation.objects.all()
for a in qs.filter(f):
value = a.findvalue.lower() value = a.findvalue.lower()
for name in super_matches: for name in super_matches:
name = ox.decode_html(name) name = ox.decode_html(name)
@ -130,7 +135,7 @@ class Annotation(models.Model):
layer = self.get_layer() layer = self.get_layer()
if self.value: if self.value:
self.value = utils.cleanup_value(self.value, layer['type']) self.value = utils.cleanup_value(self.value, layer['type'])
self.findvalue = ox.decode_html(ox.strip_tags(self.value).strip()).replace('\n', ' ') self.findvalue = ox.decode_html(ox.strip_tags(re.sub('<br */?>', ' ', self.value))).replace('\n', ' ')
sortvalue = sort_string(self.findvalue) sortvalue = sort_string(self.findvalue)
if sortvalue: if sortvalue:
self.sortvalue = sortvalue[:900] self.sortvalue = sortvalue[:900]
@ -161,9 +166,9 @@ class Annotation(models.Model):
#editAnnotations needs to be in snyc #editAnnotations needs to be in snyc
if layer.get('type') == 'place' or layer.get('hasPlaces'): if layer.get('type') == 'place' or layer.get('hasPlaces'):
update_matching_places(self.id) update_matches(self.id, 'place')
if layer.get('type') == 'event' or layer.get('hasEvents'): if layer.get('type') == 'event' or layer.get('hasEvents'):
update_matching_events(self.id) update_matches(self.id, 'event')
def delete(self, *args, **kwargs): def delete(self, *args, **kwargs):
super(Annotation, self).delete(*args, **kwargs) super(Annotation, self).delete(*args, **kwargs)

View file

@ -10,63 +10,58 @@ import models
@task(ignore_results=True, queue='default') @task(ignore_results=True, queue='default')
def update_matching_events(id): def update_matches(id, type):
from event.models import Event if type == 'place':
from place.models import Place as Model
elif type == 'event':
from event.models import Event as Model
a = models.Annotation.objects.get(pk=id) a = models.Annotation.objects.get(pk=id)
for e in a.events.filter(defined=False).exclude(name=a.value): a_matches = getattr(a, type == 'place' and 'places' or 'events')
if e.annotations.exclude(id=id).count() == 0:
e.delete()
for e in a.events.all():
e.update_matches()
if a.get_layer().get('type') == 'event' and a.events.count() == 0:
a.events.add(Event.get_or_create(a.value))
for e in a.events.all():
e.update_matches()
if a.findvalue: #remove undefined matches that only have this annotation
names = {} for p in a_matches.filter(defined=False).exclude(name=a.value):
for n in Event.objects.all().values('id', 'name', 'alternativeNames'):
names[n['id']] = [ox.decode_html(x) for x in [n['name']] + json.loads(n['alternativeNames'])]
value = a.findvalue.lower()
update = []
for i in names:
for name in names[i]:
if name.lower() in value:
update.append(i)
break
if update:
for e in Event.objects.filter(id__in=update):
e.update_matches()
@task(ignore_results=True, queue='default')
def update_matching_places(id):
from place.models import Place
a = models.Annotation.objects.get(pk=id)
for p in a.places.filter(defined=False).exclude(name=a.value):
if p.annotations.exclude(id=id).count() == 0: if p.annotations.exclude(id=id).count() == 0:
p.delete() p.delete()
for p in a.places.all(): if a.get_layer().get('type') == type and a_matches.count() == 0:
p.update_matches() a.places.add(Model.get_or_create(a.value))
if a.get_layer().get('type') == 'place' and a.places.count() == 0: for p in a_matches.all():
a.places.add(Place.get_or_create(a.value))
for p in a.places.all():
p.update_matches() p.update_matches()
if a.findvalue: if a.findvalue:
names = {} names = {}
for n in Place.objects.all().values('id', 'name', 'alternativeNames'): for n in Model.objects.all().values('id', 'name', 'alternativeNames'):
names[n['id']] = [ox.decode_html(x) for x in [n['name']] + json.loads(n['alternativeNames'])] names[n['id']] = [ox.decode_html(x)
for x in [n['name']] + json.loads(n['alternativeNames'])]
value = a.findvalue.lower() value = a.findvalue.lower()
update = []
current = [p.id for p in a_matches.all()]
matches = []
name_matches = []
for i in names: for i in names:
for name in names[i]: for name in names[i]:
if name.lower() in value: if name.lower() in value:
update.append(i) matches.append(i)
name_matches.append(name.lower())
break break
new = []
for i in matches:
p = Model.objects.get(pk=i)
#only add places/events that did not get added as a super match
#i.e. only add The Paris Region and not Paris
if not filter(lambda n: n in name_matches,
[n.lower() for n in p.get_super_matches()]):
new.append(i)
removed = filter(lambda p: p not in new, current)
added = filter(lambda p: p not in current, new)
update = removed + added
if update: if update:
for e in Place.objects.filter(id__in=update): for e in Model.objects.filter(id__in=update):
e.update_matches() e.update_matches(models.Annotation.objects.filter(pk=a.id))
else:
#annotation has no value, remove all exisint matches
for e in a_matches.all():
e.update_matches(models.Annotation.objects.filter(pk=a.id))
@task(ignore_results=True, queue='default') @task(ignore_results=True, queue='default')
def update_item(id): def update_item(id):

View file

@ -11,7 +11,7 @@ from django.conf import settings
import ox import ox
from ox.django import fields from ox.django import fields
from annotation.models import Annotation, get_matches from annotation.models import Annotation, get_matches, get_super_matches
from item.models import Item from item.models import Item
from item import utils from item import utils
from person.models import get_name_sort from person.models import get_name_sort
@ -86,27 +86,36 @@ class Event(models.Model):
def get_matches(self): def get_matches(self):
return get_matches(self, Event, 'event') return get_matches(self, Event, 'event')
def get_super_matches(self):
return get_super_matches(self, Event)
@transaction.commit_on_success @transaction.commit_on_success
def update_matches(self): def update_matches(self, annotations=None):
matches = self.get_matches() matches = self.get_matches(annotations)
if not annotations:
numberofmatches = matches.count() numberofmatches = matches.count()
for a in self.annotations.exclude(id__in=matches): annotations = self.annotations.all()
else:
numberofmatches = -1
for a in annotations.exclude(id__in=matches):
self.annotations.remove(a) self.annotations.remove(a)
#annotations of type event always need an event #annotations of type event always need an event
if a.get_layer().get('type') == 'event' and a.events.count() == 0: if a.get_layer().get('type') == 'event' and a.events.count() == 0:
a.events.add(Event.get_or_create(a.value)) a.events.add(Event.get_or_create(a.value))
for e in a.events.all(): for e in a.events.all():
e.update_matches() e.update_matches()
for i in matches.exclude(id__in=self.annotations.all()): for a in matches.exclude(id__in=self.annotations.all()):
#need to check again since editEvent might have been called again #need to check again since editEvent might have been called again
if self.annotations.filter(id=i.id).count() == 0: if self.annotations.filter(id=a.id).count() == 0:
self.annotations.add(i) self.annotations.add(a)
ids = list(set([a.item.id for a in matches])) ids = list(set([a['item_id'] for a in self.annotations.all().values('item_id')]))
for i in self.items.exclude(id__in=ids): for i in self.items.exclude(id__in=ids):
self.items.remove(i) self.items.remove(i)
for i in Item.objects.filter(id__in=ids).exclude(id__in=self.items.all()): for i in Item.objects.filter(id__in=ids).exclude(id__in=self.items.all()):
if self.items.filter(id=i.id).count() == 0: if self.items.filter(id=i.id).count() == 0:
self.items.add(i) self.items.add(i)
if numberofmatches < 0:
numberofmatches = self.annotations.all().count()
if self.matches != numberofmatches: if self.matches != numberofmatches:
self.matches = numberofmatches self.matches = numberofmatches
if numberofmatches: if numberofmatches:

View file

@ -12,8 +12,7 @@ import ox
from ox.django import fields from ox.django import fields
import managers import managers
from annotation.models import Annotation, get_matches from annotation.models import Annotation, get_matches, get_super_matches
from annotation.tasks import update_matching_places
from item.models import Item from item.models import Item
from changelog.models import Changelog from changelog.models import Changelog
@ -96,30 +95,39 @@ class Place(models.Model):
j[key] = getattr(self, key) j[key] = getattr(self, key)
return j return j
def get_matches(self): def get_matches(self, qs=None):
return get_matches(self, Place, 'place') return get_matches(self, Place, 'place', qs)
def get_super_matches(self):
return get_super_matches(self, Place)
@transaction.commit_on_success @transaction.commit_on_success
def update_matches(self): def update_matches(self, annotations=None):
matches = self.get_matches() matches = self.get_matches(annotations)
if not annotations:
numberofmatches = matches.count() numberofmatches = matches.count()
for a in self.annotations.exclude(id__in=matches): annotations = self.annotations.all()
else:
numberofmatches = -1
for a in annotations.exclude(id__in=matches):
self.annotations.remove(a) self.annotations.remove(a)
#annotations of type place always need a place #annotations of type place always need a place
if a.get_layer().get('type') == 'place' and a.places.count() == 0: if a.get_layer().get('type') == 'place' and a.places.count() == 0:
a.places.add(Place.get_or_create(a.value)) a.places.add(Place.get_or_create(a.value))
for p in a.places.all(): for p in a.places.all():
p.update_matches() p.update_matches()
for i in matches.exclude(id__in=self.annotations.all()): for a in matches.exclude(id__in=self.annotations.all()):
#need to check again since editEvent might have been called again #need to check again since editEvent might have been called again
if self.annotations.filter(id=i.id).count() == 0: if self.annotations.filter(id=a.id).count() == 0:
self.annotations.add(i) self.annotations.add(a)
ids = list(set([a.item.id for a in matches])) ids = list(set([a['item_id'] for a in self.annotations.all().values('item_id')]))
for i in self.items.exclude(id__in=ids): for i in self.items.exclude(id__in=ids):
self.items.remove(i) self.items.remove(i)
for i in Item.objects.filter(id__in=ids).exclude(id__in=self.items.all()): for i in Item.objects.filter(id__in=ids).exclude(id__in=self.items.all()):
if self.items.filter(id=i.id).count() == 0: if self.items.filter(id=i.id).count() == 0:
self.items.add(i) self.items.add(i)
if numberofmatches < 0:
numberofmatches = self.annotations.all().count()
if self.matches != numberofmatches: if self.matches != numberofmatches:
self.matches = numberofmatches self.matches = numberofmatches
if numberofmatches: if numberofmatches: