pandora/pandora/document/models.py

330 lines
11 KiB
Python
Raw Normal View History

2013-03-24 12:28:57 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division, with_statement
import os
import re
2014-02-14 13:35:40 +00:00
from glob import glob
2013-03-24 12:53:32 +00:00
from urllib import quote, unquote
2013-03-24 12:28:57 +00:00
from django.db import models
2013-05-27 11:21:08 +00:00
from django.db.models import Max
2013-03-24 12:28:57 +00:00
from django.contrib.auth.models import User
from django.db.models.signals import pre_delete
import Image
import ox
2013-05-27 11:21:08 +00:00
from item.models import Item
from archive.extract import resize_image
from archive.chunk import save_chunk
2013-05-27 11:21:08 +00:00
2013-03-24 12:28:57 +00:00
import managers
import utils
2013-03-24 12:28:57 +00:00
def get_path(f, x): return f.path(x)
2013-03-24 12:28:57 +00:00
class Document(models.Model):
2013-03-24 12:28:57 +00:00
class Meta:
unique_together = ("user", "name", "extension")
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
user = models.ForeignKey(User, related_name='files')
name = models.CharField(max_length=255)
extension = models.CharField(max_length=255)
size = models.IntegerField(default=0)
matches = models.IntegerField(default=0)
ratio = models.FloatField(default=1)
pages = models.IntegerField(default=-1)
width = models.IntegerField(default=-1)
height = models.IntegerField(default=-1)
2013-03-24 12:28:57 +00:00
description = models.TextField(default="")
oshash = models.CharField(max_length=16, unique=True, null=True)
file = models.FileField(default=None, blank=True,null=True, upload_to=get_path)
2013-03-24 12:28:57 +00:00
objects = managers.DocumentManager()
2013-03-24 12:28:57 +00:00
uploading = models.BooleanField(default = False)
2014-01-07 16:19:27 +00:00
name_sort = models.CharField(max_length=255, null=True)
description_sort = models.CharField(max_length=512, null=True)
2014-01-07 11:05:10 +00:00
dimensions_sort = models.CharField(max_length=512)
2013-03-24 12:28:57 +00:00
2013-05-27 11:21:08 +00:00
items = models.ManyToManyField(Item, through='ItemProperties', related_name='documents')
2013-03-24 12:28:57 +00:00
def save(self, *args, **kwargs):
if not self.uploading:
if self.file:
self.size = self.file.size
self.get_info()
2013-03-24 12:28:57 +00:00
self.name_sort = ox.sort_string(self.name or u'')[:255].lower()
2014-01-07 16:19:27 +00:00
if self.description:
self.description_sort = ox.sort_string(self.description)[:512].lower()
else:
self.description_sort = None
2014-01-07 11:05:10 +00:00
if self.extension == 'pdf':
2014-01-07 15:34:07 +00:00
self.dimensions_sort = ox.sort_string('1') + ox.sort_string('%d' % self.pages)
2014-01-07 11:05:10 +00:00
else:
resolution_sort = self.width * self.height
2014-01-07 15:34:07 +00:00
self.dimensions_sort = ox.sort_string('0') + ox.sort_string('%d' % resolution_sort)
2013-03-24 12:28:57 +00:00
super(Document, self).save(*args, **kwargs)
self.update_matches()
2013-03-24 12:28:57 +00:00
def __unicode__(self):
return self.get_id()
2013-05-27 11:21:08 +00:00
def add(self, item):
p, created = ItemProperties.objects.get_or_create(item=item, document=self)
if created:
p.index = ItemProperties.objects.filter(item=item).aggregate(Max('index'))['index__max'] + 1
p.save()
p.document.update_matches()
item.update_sort()
2013-05-27 11:21:08 +00:00
def remove(self, item):
ItemProperties.objects.filter(item=item, document=self).delete()
2013-03-24 12:28:57 +00:00
@classmethod
def get(cls, id):
return cls.objects.get(pk=ox.fromAZ(id))
2013-03-24 12:28:57 +00:00
def get_absolute_url(self):
2013-05-27 20:03:18 +00:00
return ('/documents/%s' % quote(self.get_id())).replace('%3A', ':')
2013-03-24 12:28:57 +00:00
def get_id(self):
return ox.toAZ(self.id)
2013-03-24 12:28:57 +00:00
def editable(self, user, item=None):
2013-03-24 12:28:57 +00:00
if not user or user.is_anonymous():
return False
if self.user == user or \
user.is_staff or \
2016-02-19 16:34:15 +00:00
user.profile.capability('canEditDocuments') == True or \
(item and item.editable(user)):
2013-03-24 12:28:57 +00:00
return True
return False
2013-05-27 11:21:08 +00:00
def edit(self, data, user, item=None):
2013-03-24 12:28:57 +00:00
for key in data:
if key == 'name':
data['name'] = re.sub(' \[\d+\]$', '', data['name']).strip()
if not data['name']:
data['name'] = "Untitled"
name = data['name']
num = 1
while Document.objects.filter(name=name, user=self.user, extension=self.extension).exclude(id=self.id).count()>0:
2013-03-24 12:28:57 +00:00
num += 1
name = data['name'] + ' [%d]' % num
self.name = name
2013-05-27 11:21:08 +00:00
elif key == 'description' and not item:
2013-03-24 12:28:57 +00:00
self.description = ox.sanitize_html(data['description'])
2013-05-27 11:21:08 +00:00
if item:
p, created = ItemProperties.objects.get_or_create(item=item, document=self)
if 'description' in data:
p.description = ox.sanitize_html(data['description'])
p.save()
2013-03-24 12:28:57 +00:00
2014-01-07 11:05:10 +00:00
@property
def dimensions(self):
if self.extension == 'pdf':
return self.pages
else:
return self.resolution
@property
def resolution(self):
return [self.width, self.height]
2013-05-27 11:21:08 +00:00
def json(self, keys=None, user=None, item=None):
2013-03-24 12:28:57 +00:00
if not keys:
keys=[
2013-03-24 12:28:57 +00:00
'description',
2014-01-07 11:05:10 +00:00
'dimensions',
2013-03-24 12:28:57 +00:00
'editable',
2015-02-13 11:06:09 +00:00
'entities',
'extension',
2013-03-24 12:28:57 +00:00
'id',
'name',
'oshash',
'ratio',
2015-02-13 11:06:09 +00:00
'size',
2014-01-07 11:05:10 +00:00
'user',
2013-03-24 12:28:57 +00:00
]
response = {}
_map = {
}
for key in keys:
if key == 'id':
response[key] = self.get_id()
elif key == 'editable':
response[key] = self.editable(user)
elif key == 'user':
response[key] = self.user.username
2015-02-13 11:06:09 +00:00
elif key == 'entities':
response[key] = [e.json(['id', 'type', 'name'])
for e in self.entities.all().order_by('documentproperties__index')]
2013-03-24 12:28:57 +00:00
elif hasattr(self, _map.get(key, key)):
2014-01-07 11:05:10 +00:00
response[key] = getattr(self, _map.get(key,key)) or ''
2013-05-27 11:21:08 +00:00
if item:
2014-01-08 04:32:57 +00:00
if isinstance(item, basestring):
2014-09-19 12:26:46 +00:00
item = Item.objects.get(public_id=item)
2013-05-27 11:21:08 +00:00
d = self.descriptions.filter(item=item)
if d.exists():
if 'description' in keys and d[0].description:
response['description'] = d[0].description
response['index'] = d[0].index
2013-03-24 12:28:57 +00:00
return response
def path(self, name=''):
2013-12-23 11:30:22 +00:00
h = ox.toAZ(self.id)
h = (7-len(h))*'0' + h
return os.path.join('documents', h[:2], h[2:4], h[4:6], h[6:], name)
2013-03-24 12:28:57 +00:00
def save_chunk(self, chunk, offset=None, done=False):
2013-03-24 12:28:57 +00:00
if self.uploading:
name = 'data.%s' % self.extension
name = self.path(name)
def done_cb():
if done:
self.uploading = False
self.get_info()
self.get_ratio()
self.oshash = ox.oshash(self.file.path)
self.save()
return True, self.file.size
return save_chunk(self, self.file, chunk, offset, name, done_cb)
return False, 0
2013-03-24 12:28:57 +00:00
2014-02-02 06:30:58 +00:00
def thumbnail(self, size=None, page=None):
src = self.file.path
2014-02-02 06:30:58 +00:00
folder = os.path.dirname(src)
if size:
size = int(size)
2014-02-02 06:30:58 +00:00
path = os.path.join(folder, '%d.jpg' % size)
else:
path = src
2014-02-02 06:30:58 +00:00
if self.extension == 'pdf':
if page:
page = int(page)
if page and page > 1 and page <= self.pages:
src = os.path.join(folder, '1024p%d.jpg' % page)
else:
src = os.path.join(folder, '1024p1.jpg')
page = 1
if not os.path.exists(src):
self.extract_page(page)
if size:
path = os.path.join(folder, '%dp%d.jpg' % (size, page))
2015-02-05 08:08:28 +00:00
elif self.extension in ('jpg', 'png', 'gif'):
if os.path.exists(src):
if size and page:
crop = map(int, page.split(','))
2016-02-19 16:59:02 +00:00
if len(crop) == 4:
path = os.path.join(folder, '%s.jpg' % ','.join(map(str, crop)))
2015-02-05 08:08:28 +00:00
if not os.path.exists(path):
2016-02-19 16:59:02 +00:00
img = Image.open(src).crop(crop)
img.save(path)
else:
img = Image.open(path)
src = path
if size < max(img.size):
path = os.path.join(folder, '%sp%s.jpg' % (size, ','.join(map(str, crop))))
if not os.path.exists(path):
resize_image(src, path, size=size)
if os.path.exists(src) and not os.path.exists(path):
image_size = max(self.width, self.height)
if image_size == -1:
image_size = max(*Image.open(src).size)
if size > image_size:
path = src
else:
resize_image(src, path, size=size)
return path
2013-03-24 12:28:57 +00:00
2014-02-02 06:30:58 +00:00
def extract_page(self, page):
pdf = self.file.path
image = os.path.join(os.path.dirname(pdf), '1024p%d.jpg' % page)
utils.extract_pdfpage(pdf, image, page)
2013-03-24 12:28:57 +00:00
def get_info(self):
if self.extension == 'pdf':
2014-11-01 16:09:33 +00:00
self.thumbnail(1024)
if self.pages == -1:
2014-02-01 11:34:40 +00:00
self.width = -1
self.height = -1
self.pages = utils.pdfpages(self.file.path)
elif self.width == -1:
self.pages = -1
self.width, self.height = Image.open(self.file.path).size
2013-03-24 12:28:57 +00:00
def get_ratio(self):
if self.extension == 'pdf':
2014-11-01 13:42:16 +00:00
image = self.thumbnail(1024)
try:
size = Image.open(image).size
except:
size = [1,1]
2014-01-05 11:40:54 +00:00
else:
if self.width > 0:
size = self.resolution
else:
size = [1,1]
2013-03-24 12:28:57 +00:00
self.ratio = size[0] / size[1]
2014-01-05 11:40:54 +00:00
return self.ratio
2013-03-24 12:28:57 +00:00
2013-03-24 12:53:32 +00:00
def update_matches(self):
import annotation.models
import item.models
import text.models
urls = [self.get_absolute_url()]
url = unquote(urls[0])
if url != urls[0]:
urls.append(url)
2015-02-13 11:06:09 +00:00
matches = self.items.count() + self.entities.count()
2013-03-24 12:53:32 +00:00
for url in urls:
matches += annotation.models.Annotation.objects.filter(value__contains=url).count()
matches += item.models.Item.objects.filter(data__contains=url).count()
matches += text.models.Text.objects.filter(text__contains=url).count()
if matches != self.matches:
Document.objects.filter(id=self.id).update(matches=matches)
2013-03-24 12:53:32 +00:00
self.matches = matches
2013-03-24 12:28:57 +00:00
def delete_document(sender, **kwargs):
2013-03-24 12:28:57 +00:00
t = kwargs['instance']
if t.file:
2014-02-14 13:35:40 +00:00
folder = os.path.dirname(t.file.path)
2014-02-02 06:35:04 +00:00
for f in glob('%s/*' % folder):
2014-02-14 13:35:40 +00:00
if f != t.file.path:
2014-02-02 06:35:04 +00:00
os.unlink(f)
2013-03-24 12:28:57 +00:00
t.file.delete()
pre_delete.connect(delete_document, sender=Document)
2013-03-24 12:28:57 +00:00
2013-05-27 11:21:08 +00:00
class ItemProperties(models.Model):
class Meta:
unique_together = ("item", "document")
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
item = models.ForeignKey(Item)
document = models.ForeignKey(Document, related_name='descriptions')
description = models.TextField(default="")
index = models.IntegerField(default=0)
2014-01-08 04:32:57 +00:00
description_sort = models.CharField(max_length=512, null=True)
def save(self, *args, **kwargs):
if self.description:
self.description_sort = ox.sort_string(self.description)[:512].lower()
else:
self.description_sort = self.document.description_sort
super(ItemProperties, self).save(*args, **kwargs)