pandora/pandora/annotation/models.py

316 lines
12 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement
2012-01-17 08:58:33 +00:00
import re
import unicodedata
from django.db import models, transaction
2012-01-17 08:58:33 +00:00
from django.db.models import Q
from django.contrib.auth.models import User
2011-11-02 14:42:07 +00:00
from django.conf import settings
2012-02-01 15:25:18 +00:00
from django.db.models.signals import pre_delete
2011-01-28 08:43:46 +00:00
import ox
2011-10-02 18:16:28 +00:00
from clip.models import Clip
from item.utils import sort_string, get_by_key
2011-06-17 07:44:45 +00:00
import managers
import utils
from tasks import update_matches
2010-12-28 14:04:28 +00:00
2012-05-27 14:21:08 +00:00
def get_super_matches(obj, model):
2012-01-17 08:58:33 +00:00
super_matches = []
q = Q(name_find__contains=" " + obj.name)|Q(name_find__contains="|%s"%obj.name)
for name in obj.alternativeNames:
q = q|Q(name_find__contains=" " + name)|Q(name_find__contains="|%s"%name)
for p in model.objects.filter(q).exclude(id=obj.id):
for othername in [p.name] + list(p.alternativeNames):
for name in [obj.name] + list(obj.alternativeNames):
if name in othername:
super_matches.append(othername)
2012-05-27 14:21:08 +00:00
return super_matches
2012-01-17 08:58:33 +00:00
2012-05-27 14:21:08 +00:00
def get_matches(obj, model, layer_type, qs=None):
super_matches = obj.get_super_matches()
2012-01-17 08:58:33 +00:00
exact = [l['id'] for l in filter(lambda l: l['type'] == layer_type, settings.CONFIG['layers'])]
if exact:
q = Q(value__iexact=obj.name)
for name in obj.alternativeNames:
q = q|Q(value__iexact=name)
f = q&Q(layer__in=exact)
else:
f = None
has_type = 'has%ss' % layer_type.capitalize()
contains = [l['id'] for l in filter(lambda l: l.get(has_type), settings.CONFIG['layers'])]
if contains:
2012-05-27 11:52:12 +00:00
name = ox.decode_html(obj.name)
name = unicodedata.normalize('NFKD', name).lower()
2012-03-09 22:37:54 +00:00
q = Q(findvalue__icontains=" " + name)|Q(findvalue__istartswith=name)
2012-01-17 08:58:33 +00:00
for name in obj.alternativeNames:
2012-05-27 11:52:12 +00:00
name = ox.decode_html(name)
name = unicodedata.normalize('NFKD', name).lower()
q = q|Q(findvalue__icontains=" " + name)|Q(findvalue__istartswith=name)
2012-01-17 08:58:33 +00:00
contains_matches = q&Q(layer__in=contains)
if f:
f = contains_matches | f
else:
f = contains_matches
matches = []
2012-05-27 14:21:08 +00:00
if not qs:
qs = Annotation.objects.all()
for a in qs.filter(f):
2013-01-12 08:12:11 +00:00
if a.findvalue:
value = a.findvalue.lower()
for name in super_matches:
name = ox.decode_html(name)
value = value.replace(name.lower(), '')
for name in [obj.name] + list(obj.alternativeNames):
name = name.lower()
name = ox.decode_html(name)
name = unicodedata.normalize('NFKD', name).lower()
if name in value and (exact or re.compile('((^|\s)%s([\.,;:!?\'"\)\]\-\/\s]|$))'%re.escape(name)).findall(value)):
2013-01-12 08:12:11 +00:00
matches.append(a.id)
break
2012-01-17 08:58:33 +00:00
if not matches:
matches = [-1]
return Annotation.objects.filter(id__in=matches)
2011-02-11 10:21:25 +00:00
2010-11-28 16:03:23 +00:00
class Annotation(models.Model):
2011-06-17 07:44:45 +00:00
objects = managers.AnnotationManager()
2011-01-01 11:44:42 +00:00
#FIXME: here having a item,start index would be good
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
2014-12-22 14:29:30 +00:00
user = models.ForeignKey(User, related_name='annotations')
2011-01-06 03:11:00 +00:00
item = models.ForeignKey('item.Item', related_name='annotations')
2011-10-02 18:16:28 +00:00
clip = models.ForeignKey('clip.Clip', null=True, related_name='annotations')
public_id = models.CharField(max_length=128, unique=True)
#seconds
2011-08-23 10:47:59 +00:00
start = models.FloatField(default=-1, db_index=True)
end = models.FloatField(default=-1, db_index=True)
2011-11-02 14:06:34 +00:00
layer = models.CharField(max_length=255, db_index=True)
value = models.TextField()
findvalue = models.TextField(null=True)
sortvalue = models.CharField(max_length=1000, null=True, blank=True, db_index=True)
2014-07-23 15:26:23 +00:00
languages = models.CharField(max_length=255, null=True, blank=True)
def editable(self, user):
if user.is_authenticated():
2016-02-19 16:34:15 +00:00
if user.profile.capability('canEditAnnotations') or \
2011-01-21 09:31:49 +00:00
self.user == user or \
2012-02-08 10:45:33 +00:00
user.groups.filter(id__in=self.item.groups.all()).count() > 0:
return True
return False
2014-11-17 19:56:27 +00:00
@classmethod
def get(cls, id):
return cls.objects.get(public_id=id)
2011-09-30 22:28:35 +00:00
def set_public_id(self):
self.public_id = self.item.next_annotationid()
2011-09-30 22:28:35 +00:00
2012-01-10 16:00:41 +00:00
@classmethod
def public_layers(self):
layers = []
for layer in settings.CONFIG['layers']:
if not layer.get('private', False):
layers.append(layer['id'])
return layers
2011-12-26 14:30:30 +00:00
def get_layer(self):
for layer in settings.CONFIG['layers']:
if layer['id'] == self.layer:
return layer
return {}
2011-08-23 10:47:59 +00:00
def save(self, *args, **kwargs):
2011-10-29 14:12:28 +00:00
set_public_id = not self.id or not self.public_id
2011-12-26 14:30:30 +00:00
layer = self.get_layer()
2011-11-03 11:44:53 +00:00
if self.value:
2011-12-26 14:43:04 +00:00
self.value = utils.cleanup_value(self.value, layer['type'])
2015-02-13 15:22:14 +00:00
findvalue = self.value
try:
l = self.get_layer()
if l['type'] == 'entity':
findvalue = self.get_entity().name
except:
pass
self.findvalue = ox.decode_html(ox.strip_tags(re.sub('<br */?>\n?', ' ', findvalue))).replace('\n', ' ')
self.findvalue = unicodedata.normalize('NFKD', self.findvalue).lower()
2011-12-26 14:43:04 +00:00
sortvalue = sort_string(self.findvalue)
while sortvalue and not unicodedata.category(sortvalue[0])[0] in ('L', 'N'):
sortvalue = sortvalue[1:]
2011-11-03 11:44:53 +00:00
if sortvalue:
self.sortvalue = sortvalue[:900]
2011-11-03 11:44:53 +00:00
else:
self.sortvalue = None
2014-07-23 15:26:23 +00:00
self.languages = ','.join(re.compile('lang="(.*?)"').findall(self.value))
if not self.languages:
self.languages = None
2011-11-03 11:44:53 +00:00
else:
self.findvalue = None
2011-11-03 11:44:53 +00:00
self.sortvalue = None
2014-07-23 15:26:23 +00:00
self.languages = None
2011-10-04 09:39:00 +00:00
2016-02-19 16:25:09 +00:00
with transaction.atomic():
if not self.clip or self.start != self.clip.start or self.end != self.clip.end:
self.clip, created = Clip.get_or_create(self.item, self.start, self.end)
2011-10-04 09:39:00 +00:00
if set_public_id:
self.set_public_id()
2011-11-02 13:26:38 +00:00
super(Annotation, self).save(*args, **kwargs)
if self.clip:
Clip.objects.filter(**{
'id': self.clip.id,
self.layer: False
}).update(**{self.layer: True})
#update clip.findvalue
self.clip.save()
2011-11-02 13:26:38 +00:00
#editAnnotations needs to be in snyc
if layer.get('type') == 'place' or layer.get('hasPlaces'):
update_matches(self.id, 'place')
if layer.get('type') == 'event' or layer.get('hasEvents'):
update_matches(self.id, 'event')
2011-01-18 09:54:14 +00:00
2012-02-28 20:02:50 +00:00
def delete(self, *args, **kwargs):
2016-02-19 16:25:09 +00:00
with transaction.atomic():
super(Annotation, self).delete(*args, **kwargs)
if self.clip and self.clip.annotations.count() == 0:
self.clip.delete()
self.item.update_find()
self.item.update_sort()
self.item.update_facets()
2012-02-28 20:02:50 +00:00
2012-02-01 15:25:18 +00:00
def cleanup_undefined_relations(self):
layer = self.get_layer()
if layer.get('type') == 'place':
for p in self.places.filter(defined=False):
if p.annotations.exclude(id=self.id).count() == 0:
p.delete()
elif layer.get('type') == 'event':
for e in self.events.filter(defined=False):
if e.annotations.exclude(id=self.id).count() == 0:
e.delete()
2015-02-13 15:22:14 +00:00
def get_entity(self):
2015-02-13 11:06:09 +00:00
from entity.models import Entity
2015-02-13 15:22:14 +00:00
return Entity.get(self.value)
def _get_entity_json(self, user=None, entity_cache=None):
"""When serializing many annotations pointing to the same entity, it is expensive to
repeatedly look up and serialize the same entity.
TODO: if Entity were a (nullable) foreign key of Annotation, we could just:
prefetch_related('entity', 'entity__user', 'entity__documents')
before serializing the annotations, which would make self.entity.json(user=user) cheap and
all this unnecessary.
"""
if entity_cache is not None and self.value in entity_cache:
return entity_cache[self.value]
entity = self.get_entity()
entity_json = entity.json(user=user)
value = entity.annotation_value()
if entity_cache is not None:
entity_cache[self.value] = (entity_json, value)
return (entity_json, value)
annotation_keys = (
'id', 'in', 'out', 'value', 'created', 'modified',
'duration', 'layer', 'item', 'videoRatio', 'languages',
'entity', 'event', 'place'
)
_clip_keys = ('hue', 'lightness', 'saturation', 'volume')
def json(self, layer=False, keys=None, user=None, entity_cache=None):
2011-06-01 11:03:07 +00:00
j = {
2010-12-28 14:04:28 +00:00
'user': self.user.username,
'id': self.public_id,
'in': self.start,
'out': self.end,
'value': self.value,
'created': self.created,
'modified': self.modified,
2010-12-28 14:04:28 +00:00
}
j['duration'] = abs(j['out'] - j['in'])
2012-02-08 11:00:11 +00:00
if user:
j['editable'] = self.editable(user)
2014-07-23 15:26:23 +00:00
if self.languages:
j['languages'] = self.languages.split(',')
2012-01-12 19:32:54 +00:00
l = self.get_layer()
2014-11-20 13:20:48 +00:00
if l['type'] == 'entity':
2014-12-16 15:21:24 +00:00
try:
(j['entity'], j['value']) = self._get_entity_json(
user=user, entity_cache=entity_cache)
2014-12-16 15:21:24 +00:00
except:
2014-11-20 13:20:48 +00:00
j['entity'] = {}
2012-01-12 19:32:54 +00:00
elif l['type'] == 'event':
2012-02-01 16:35:57 +00:00
qs = self.events.all()
2012-01-12 19:32:54 +00:00
if qs.count() > 0:
j['event'] = qs[0].json(user=user)
else:
j['event'] = {}
2014-11-20 13:20:48 +00:00
elif l['type'] == 'place':
qs = self.places.all()
if qs.count() > 0:
j['place'] = qs[0].json(user=user)
else:
j['place'] = {}
2012-01-12 19:32:54 +00:00
2011-10-19 15:55:29 +00:00
if layer or (keys and 'layer' in keys):
2011-11-02 14:42:07 +00:00
j['layer'] = self.layer
if keys and 'item' in keys:
j['item'] = self.item.public_id
2011-06-16 20:00:10 +00:00
if keys:
_j = {}
for key in keys:
if key in j:
_j[key] = j[key]
j = _j
2011-08-19 14:43:05 +00:00
if 'videoRatio' in keys:
streams = self.item.streams()
if streams:
j['videoRatio'] = streams[0].aspect_ratio
for key in keys:
if key not in j:
if key in self._clip_keys:
j[key] = getattr(self.clip, key)
elif key not in self.annotation_keys:
value = self.item.get(key) or self.item.json.get(key)
if not value and hasattr(self.item.sort, key):
value = getattr(self.item.sort, key)
if value != None:
j[key] = value
subtitles = get_by_key(settings.CONFIG['layers'], 'isSubtitles', True)
if subtitles:
if 'id' in j and self.layer == subtitles['id'] and not self.value:
del j['id']
2011-06-01 11:03:07 +00:00
return j
2010-12-28 14:04:28 +00:00
2010-11-28 16:31:53 +00:00
def __unicode__(self):
return u"%s %s-%s" %(self.public_id, self.start, self.end)
2011-06-04 16:19:06 +00:00
2012-02-01 15:25:18 +00:00
def cleanup_related(sender, **kwargs):
kwargs['instance'].cleanup_undefined_relations()
pre_delete.connect(cleanup_related, sender=Annotation)
2015-04-16 09:53:05 +00:00
def rename_layer(old, new):
import item.models
Annotation.objects.filter(layer=old).update(layer=new)
item.models.ItemFind.objects.filter(key=old).update(key=new)
item.models.Facet.objects.filter(key=old).update(key=new)