pandora/pandora/entity/models.py

285 lines
9.9 KiB
Python
Raw Normal View History

2014-11-18 18:16:28 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement
import os
import re
from glob import glob
from urllib import quote, unquote
import unicodedata
2014-11-18 18:16:28 +00:00
from django.db import models, transaction
2014-11-18 18:16:28 +00:00
from django.db.models import Max
from django.contrib.auth.models import User
from django.db.models.signals import pre_delete, post_init
2014-11-20 13:20:48 +00:00
from django.conf import settings
2014-11-18 18:16:28 +00:00
import ox
2016-02-20 09:06:41 +00:00
from oxdjango import fields
2014-11-18 18:16:28 +00:00
from person.models import get_name_sort
from item.utils import get_by_id
2015-02-13 11:06:09 +00:00
from document.models import Document
2014-11-18 18:16:28 +00:00
import managers
class Entity(models.Model):
class Meta:
unique_together = ("type", "name")
2014-11-19 19:21:42 +00:00
user = models.ForeignKey(User, related_name='entities', null=True, default=None)
2014-11-18 18:16:28 +00:00
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
type = models.CharField(max_length=255)
name = models.CharField(max_length=255)
alternativeNames = fields.TupleField(default=())
2014-11-18 18:16:28 +00:00
data = fields.DictField(default={}, editable=False)
matches = models.IntegerField(default=0)
objects = managers.EntityManager()
name_sort = models.CharField(max_length=255, null=True)
name_find = models.TextField(default='', editable=True)
2015-02-13 11:06:09 +00:00
documents = models.ManyToManyField(Document, through='DocumentProperties', related_name='entities')
2014-11-18 18:16:28 +00:00
def save(self, *args, **kwargs):
2014-12-16 16:45:35 +00:00
entity = get_by_id(settings.CONFIG['entities'], self.type)
if entity.get('sortType') == 'person' and self.name:
2015-02-03 17:44:05 +00:00
if isinstance(self.name, str):
self.name = self.name.decode('utf-8')
self.name_sort = get_name_sort(self.name)[:255].lower()
else:
2014-12-16 17:44:05 +00:00
self.name_sort = ox.sort_string(self.name or u'')[:255].lower() or None
self.name_find = '||' + '||'.join((self.name,) + self.alternativeNames) + '||'
self.name_find = self.name_find.lower()
2014-11-18 18:16:28 +00:00
super(Entity, self).save(*args, **kwargs)
2014-11-20 13:20:48 +00:00
self.update_matches()
self.update_annotations()
self.update_find()
2014-11-18 18:16:28 +00:00
def __unicode__(self):
return self.get_id()
@classmethod
def get(cls, id):
return cls.objects.get(pk=ox.fromAZ(id))
2014-12-16 14:59:21 +00:00
@classmethod
2015-02-06 07:12:19 +00:00
def get_by_name(cls, name, type):
return cls.objects.get(name_find__contains=u'|%s|' % name.lower(), type=type)
2014-12-16 14:59:21 +00:00
2014-11-18 18:16:28 +00:00
@classmethod
def get_or_create(model, name):
qs = model.objects.filter(name_find__contains=u'|%s|' % name.lower())
2014-11-18 18:16:28 +00:00
if qs.count() == 0:
instance = model(name=name)
instance.save()
else:
instance = qs[0]
return instance
def get_absolute_url(self):
return ('/entities/%s' % quote(self.get_id())).replace('%3A', ':')
def get_id(self):
return ox.toAZ(self.id)
2015-02-13 11:06:09 +00:00
def add(self, document):
p, created = DocumentProperties.objects.get_or_create(document=document, entity=self)
if created:
p.index = DocumentProperties.objects.filter(entity=self).aggregate(Max('index'))['index__max'] + 1
p.save()
p.document.update_matches()
def remove(self, document):
DocumentProperties.objects.filter(document=document, entity=self).delete()
2014-11-18 18:16:28 +00:00
def editable(self, user, item=None):
if not user or user.is_anonymous():
return False
if user.is_staff or \
2016-02-19 16:34:15 +00:00
user.profile.capability('canEditEntities') == True or \
2014-11-18 18:16:28 +00:00
(item and item.editable(user)):
return True
return False
def edit(self, data):
for key in data:
if key == 'name':
data['name'] = re.sub(' \[\d+\]$', '', data['name']).strip()
if not data['name']:
2014-11-19 19:21:42 +00:00
data['name'] = "Unnamed"
2014-11-18 18:16:28 +00:00
name = data['name']
n = 1
while Entity.objects.filter(name_find__contains=u'|%s|' % name.lower()).exclude(id=self.id).count() > 0:
n += 1
name = data['name'] + ' [%d]' % n
2014-11-18 18:16:28 +00:00
self.name = name
elif key == 'type':
self.type = data[key]
elif key == 'alternativeNames':
used_names = [self.name.lower()]
names = []
for v in data[key]:
name = ox.decode_html(v)
name = re.sub(' \[\d+\]$', '', name).strip()
name_ = name
n = 1
while name in used_names or \
Entity.objects.filter(name_find__contains=u'|%s|' % name.lower()).exclude(id=self.id).count() > 0:
n += 1
name = name_ + ' [%d]' % n
names.append(name)
used_names.append(name.lower())
self.alternativeNames = tuple(ox.escape_html(n) for n in names)
2014-11-18 18:16:28 +00:00
else:
#FIXME: more data validation
if isinstance(data[key], basestring):
self.data[key] = ox.sanitize_html(data[key])
else:
self.data[key] = data[key]
def json(self, keys=None, user=None):
if not keys:
keys=[
2014-11-19 19:21:42 +00:00
'alternativeNames',
2014-11-18 18:16:28 +00:00
'editable',
'id',
'name',
2014-12-16 17:44:05 +00:00
'sortName',
2014-11-19 19:21:42 +00:00
'type',
'user',
2015-02-13 11:06:09 +00:00
'documents',
2014-11-18 18:16:28 +00:00
] + self.data.keys()
response = {}
for key in keys:
if key == 'id':
response[key] = self.get_id()
elif key == 'editable':
response[key] = self.editable(user)
2014-11-19 19:21:42 +00:00
elif key == 'user':
response[key] = self.user and self.user.username
2014-11-18 18:16:28 +00:00
elif key in ('name', 'alternativeNames', 'type'):
response[key] = getattr(self, key)
2014-12-16 17:44:05 +00:00
elif key == 'sortName':
response[key] = self.name_sort
2015-02-13 11:06:09 +00:00
elif key == 'documents':
response[key] = [ox.toAZ(id_)
for id_, in self.documentproperties.order_by('index').values_list('document_id')]
2014-11-18 18:16:28 +00:00
elif key in self.data:
response[key] = self.data[key]
return response
2015-04-16 20:02:31 +00:00
def annotation_value(self):
#return u'<a href="/entities/%s">%s</a>' % (self.get_id(), ox.escape_html(self.name))
return ox.escape_html(self.name)
def update_find(self):
def save(key, value):
if value not in ('', None):
f, created = Find.objects.get_or_create(entity=self, key=key)
if isinstance(value, bool):
value = value and 'true' or 'false'
if isinstance(value, basestring):
value = ox.decode_html(ox.strip_tags(value.strip()))
value = unicodedata.normalize('NFKD', value).lower()
f.value = value
f.save()
else:
Find.objects.filter(entity=self, key=key).delete()
2015-04-21 10:04:37 +00:00
entity = get_by_id(settings.CONFIG['entities'], self.type)
if not entity:
return
2016-02-19 16:25:09 +00:00
with transaction.atomic():
2015-04-17 15:01:49 +00:00
ids = ['name']
for key in entity['keys']:
value = self.data.get(key['id'])
if isinstance(value, list):
value = u'\n'.join(value)
save(key['id'], value)
2015-04-17 15:01:49 +00:00
ids.append(key['id'])
save('name', u'\n'.join([self.name] + list(self.alternativeNames)))
self.find.exclude(key__in=ids).delete()
2014-11-18 18:16:28 +00:00
def update_matches(self):
import annotation.models
import item.models
import text.models
urls = [self.get_absolute_url()]
url = unquote(urls[0])
if url != urls[0]:
urls.append(url)
2014-11-20 13:20:48 +00:00
entity_layers = [l['id'] for l in settings.CONFIG['layers'] if l['type'] == 'entity']
if entity_layers:
matches = annotation.models.Annotation.objects.filter(layer__in=entity_layers, value=self.get_id()).count()
else:
matches = 0
2014-11-18 18:16:28 +00:00
for url in urls:
matches += annotation.models.Annotation.objects.filter(value__contains=url).count()
matches += item.models.Item.objects.filter(data__contains=url).count()
matches += text.models.Text.objects.filter(text__contains=url).count()
if matches != self.matches:
Entity.objects.filter(id=self.id).update(matches=matches)
self.matches = matches
def update_annotations(self):
2015-02-26 06:07:18 +00:00
import annotation.models
import annotation.tasks
if self.name == self._original_name:
return
entity_layers = [l['id'] for l in settings.CONFIG['layers'] if l['type'] == 'entity']
if entity_layers:
annotation.tasks.update_annotations.delay(entity_layers, self.get_id())
2015-02-13 11:06:09 +00:00
def entity_post_init(sender, instance, **kwargs):
instance._original_name = instance.name
post_init.connect(
entity_post_init,
sender=Entity,
)
2015-02-13 11:06:09 +00:00
class DocumentProperties(models.Model):
class Meta:
unique_together = ("entity", "document")
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
document = models.ForeignKey(Document, related_name='documentproperties')
2015-02-13 11:06:09 +00:00
entity = models.ForeignKey(Entity, related_name='documentproperties')
index = models.IntegerField(default=0)
2016-02-19 10:36:55 +00:00
data = fields.DictField(default={})
2015-02-13 11:06:09 +00:00
def __unicode__(self):
return u"%r-%r" % (self.document, self.entity)
2015-02-13 11:06:09 +00:00
def save(self, *args, **kwargs):
super(DocumentProperties, self).save(*args, **kwargs)
class Find(models.Model):
class Meta:
unique_together = ("entity", "key")
entity = models.ForeignKey('Entity', related_name='find', db_index=True)
key = models.CharField(max_length=200, db_index=True)
value = models.TextField(blank=True, db_index=settings.DB_GIN_TRGM)
def __unicode__(self):
return u"%s=%s" % (self.key, self.value)