new documents section

This commit is contained in:
j 2016-10-05 00:00:03 +02:00
commit e1f35b1ec8
74 changed files with 6737 additions and 631 deletions

View file

View file

@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
from django.core.management.base import BaseCommand
from django.db import connection, transaction
from django.db.models import fields
from django.conf import settings
settings.RELOAD_CONFIG = False
import app.monkey_patch
from ... import models
class Command(BaseCommand):
help = 'update document find and sort values'
args = ''
def handle(self, **options):
ids = [i['id'] for i in models.Document.objects.all().values('id')]
for id in ids:
try:
i = models.Document.objects.get(id=id)
if i.file:
i.get_info()
i.get_ratio()
#print(i, i.ratio)
i.save()
except:
pass

View file

@ -1,6 +1,10 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import unicodedata
from six import string_types
from django.db.models import Q, Manager
from django.conf import settings
import ox
from oxdjango.query import QuerySet
@ -8,14 +12,31 @@ from oxdjango.query import QuerySet
import entity.managers
from oxdjango.managers import get_operator
from documentcollection.models import Collection
from item import utils
keymap = {
'user': 'user__username',
'item': 'items__public_id',
}
default_key = 'name'
default_key = 'title'
def parseCondition(condition, user, item=None):
def get_key_type(k):
key_type = (utils.get_by_id(settings.CONFIG['documentKeys'], k) or {'type': 'string'}).get('type')
if isinstance(key_type, list):
key_type = key_type[0]
key_type = {
'title': 'string',
'person': 'string',
'text': 'string',
'year': 'string',
'length': 'string',
'layer': 'string',
'list': 'list',
}.get(key_type, key_type)
return key_type
def parseCondition(condition, user, item=None, owner=None):
'''
'''
k = condition.get('key', default_key)
@ -33,17 +54,47 @@ def parseCondition(condition, user, item=None):
op = '='
if op.startswith('!'):
return ~buildCondition(k, op[1:], v)
return buildCondition(k, op[1:], v, user, True, owner=owner)
else:
return buildCondition(k, op, v)
return buildCondition(k, op, v, user, owner=owner)
def buildCondition(k, op, v):
def buildCondition(k, op, v, user, exclude=False, owner=None):
import entity.models
from . import models
# fixme: frontend should never call with list
if k == 'list':
print('fixme: frontend should never call with list', k, op, v)
k = 'collection'
key_type = get_key_type(k)
facet_keys = models.Document.facet_keys
if k == 'id':
v = ox.fromAZ(v)
return Q(**{k: v})
if isinstance(v, bool):
q = Q(**{k: v})
if exclude:
q = ~Q(id__in=models.Document.objects.filter(q))
return q
elif k == 'groups':
if op == '==' and v == '$my':
if not owner:
owner = user
groups = owner.groups.all()
else:
key = 'name' + get_operator(op)
groups = Group.objects.filter(**{key: v})
if not groups.count():
return Q(id=0)
q = Q(groups__in=groups)
if exclude:
q = ~q
return q
elif k in ('oshash', 'items__public_id'):
q = Q(**{k: v})
if exclude:
q = ~Q(id__in=models.Document.objects.filter(q))
return q
elif isinstance(v, bool):
key = k
elif k == 'entity':
entity_key, entity_v = entity.managers.namePredicate(op, v)
@ -51,13 +102,87 @@ def buildCondition(k, op, v):
v = entity.models.DocumentProperties.objects.filter(**{
'entity__' + entity_key: entity_v
}).values_list('document_id', flat=True)
else:
key = k + get_operator(op, 'istr')
elif k == 'collection':
q = Q(id=0)
l = v.split(":", 1)
if len(l) >= 2:
lqs = list(Collection.objects.filter(name=l[1], user__username=l[0]))
if len(lqs) == 1 and lqs[0].accessible(user):
l = lqs[0]
if l.query.get('static', False) is False:
data = l.query
q = parseConditions(data.get('conditions', []),
data.get('operator', '&'),
user, owner=l.user)
else:
q = Q(id__in=l.documents.all())
else:
q = Q(id=0)
return q
elif key_type == 'boolean':
q = Q(**{'find__key': k, 'find__value': v})
if exclude:
q = ~Q(id__in=models.Document.objects.filter(q))
return q
elif key_type == "string":
in_find = True
if in_find:
value_key = 'find__value'
else:
value_key = k
if isinstance(v, string_types):
v = unicodedata.normalize('NFKD', v).lower()
if k in facet_keys:
in_find = False
facet_value = 'facets__value' + get_operator(op, 'istr')
v = models.Document.objects.filter(**{'facets__key': k, facet_value: v})
value_key = 'id__in'
else:
value_key = value_key + get_operator(op)
k = str(k)
value_key = str(value_key)
if k == '*':
q = Q(**{value_key: v})
elif in_find:
q = Q(**{'find__key': k, value_key: v})
else:
q = Q(**{value_key: v})
if exclude:
q = ~Q(id__in=models.Document.objects.filter(q))
return q
elif key_type == 'date':
def parse_date(d):
while len(d) < 3:
d.append(1)
return datetime(*[int(i) for i in d])
#using sort here since find only contains strings
v = parse_date(v.split('-'))
vk = 'sort__%s%s' % (k, get_operator(op, 'int'))
vk = str(vk)
q = Q(**{vk: v})
if exclude:
q = ~q
return q
else: # integer, float, list, time
#use sort table here
if key_type == 'time':
v = int(utils.parse_time(v))
vk = 'sort__%s%s' % (k, get_operator(op, 'int'))
vk = str(vk)
q = Q(**{vk: v})
if exclude:
q = ~q
return q
key = str(key)
return Q(**{key: v})
q = Q(**{key: v})
if exclude:
q = ~q
return q
def parseConditions(conditions, operator, user, item=None):
def parseConditions(conditions, operator, user, item=None, owner=None):
'''
conditions: [
{
@ -80,12 +205,12 @@ def parseConditions(conditions, operator, user, item=None):
for condition in conditions:
if 'conditions' in condition:
q = parseConditions(condition['conditions'],
condition.get('operator', '&'), user, item)
condition.get('operator', '&'), user, item, owner=owner)
if q:
conn.append(q)
pass
else:
conn.append(parseCondition(condition, user, item))
conn.append(parseCondition(condition, user, item, owner=owner))
if conn:
q = conn[0]
for c in conn[1:]:
@ -133,4 +258,21 @@ class DocumentManager(Manager):
if conditions:
qs = qs.filter(conditions)
#anonymous can only see public items
if not user or user.is_anonymous():
level = 'guest'
allowed_level = settings.CONFIG['capabilities']['canSeeDocument'][level]
qs = qs.filter(rightslevel__lte=allowed_level)
rendered_q = Q(rendered=True)
#users can see public items, there own items and items of there groups
else:
level = user.profile.get_level()
allowed_level = settings.CONFIG['capabilities']['canSeeDocument'][level]
q = Q(rightslevel__lte=allowed_level) | Q(user=user)
rendered_q = Q(rendered=True) | Q(user=user)
if user.groups.count():
q |= Q(groups__in=user.groups.all())
rendered_q |= Q(groups__in=user.groups.all())
qs = qs.filter(q)
return qs

View file

@ -0,0 +1,26 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-10-04 16:31
from __future__ import unicode_literals
from django.db import migrations, models
import oxdjango.fields
class Migration(migrations.Migration):
dependencies = [
('document', '0002_auto_20160219_1537'),
]
operations = [
migrations.AddField(
model_name='document',
name='data',
field=oxdjango.fields.DictField(default={}),
),
migrations.AddField(
model_name='document',
name='rightslevel',
field=models.IntegerField(db_index=True, default=0),
),
]

View file

@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import ox
from django.db import migrations, models
from django.db.models import Max
from PIL import Image
import oxdjango.fields
def migrate_texts(apps, schema_editor):
import os
import ox
import shutil
import document.models
from document import utils
Text = apps.get_model("text", "Text")
Document = apps.get_model("document", "Document")
Collection = apps.get_model("documentcollection", "Collection")
CollectionDocument = apps.get_model("documentcollection", "CollectionDocument")
User = apps.get_model("auth", "User")
def add(self, document):
q = self.documents.filter(id=document.id)
if q.count() == 0:
l = CollectionDocument()
l.collection = self
l.document = document
l.index = CollectionDocument.objects.filter(collection=self).aggregate(Max('index'))['index__max']
if l.index is None:
l.index = 0
else:
l.index += 1
l.save()
def path(self, name=''):
h = ox.toAZ(self.id)
h = (7-len(h))*'0' + h
return os.path.join('documents', h[:2], h[2:4], h[4:6], h[6:], name)
def update_info(self):
pdf = self.file.path
page = 1
image = os.path.join(os.path.dirname(pdf), '1024p%d.jpg' % page)
utils.extract_pdfpage(pdf, image, page)
self.pages = utils.pdfpages(self.file.path)
if os.path.exists(image):
size = Image.open(image).size
self.ratio = size[0] / size[1]
if Text.objects.filter(status='featured').count():
first_user = User.objects.all()[0]
featured, created = Collection.objects.get_or_create(user=first_user, name='Featured Texts')
if created:
featured.status = 'featured'
featured.save()
for t in Text.objects.all():
d = Document()
d.extension = t.type
if t.name == '':
d.name = 'Index'
else:
d.name = t.name
d.user = t.user
d.description = t.description
d.data['text'] = t.text
d.data['embeds'] = t.embeds
d.save()
if t.type == 'pdf':
d.file.name = path(d, 'data.pdf')
os.makedirs(os.path.dirname(d.file.path))
shutil.copy2(t.file.path, d.file.path)
d.oshash = ox.oshash(d.file.path)
update_info(d)
d.save()
Document.objects.filter(id=d.id).update(created=t.created, modified=t.modified)
c, created = Collection.objects.get_or_create(user=t.user, name='Texts')
add(c, d)
if t.status == 'featured':
add(featured, d)
for user in t.subscribed_users.all():
favorite, created = Collection.objects.get_or_create(user=user, name='Favorite Texts')
add(favorite, d)
'''
for d in document.models.Document.objects.filter(id__in=fix_info):
d.get_info()
d.get_ratio()
d.save()
'''
class Migration(migrations.Migration):
dependencies = [
('auth', '__first__'),
('text', '__first__'),
('document', '0003_new_fields'),
('documentcollection', '0001_initial'),
]
operations = [
migrations.RunPython(migrate_texts),
]

View file

@ -0,0 +1,32 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-10-08 12:32
from __future__ import unicode_literals
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('document', '0004_migrate_text'),
]
operations = [
migrations.CreateModel(
name='Access',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('access', models.DateTimeField(auto_now=True)),
('accessed', models.IntegerField(default=0)),
('document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='accessed', to='document.Document')),
('user', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='accessed_documents', to=settings.AUTH_USER_MODEL)),
],
),
migrations.AlterUniqueTogether(
name='access',
unique_together=set([('document', 'user')]),
),
]

View file

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-10-26 12:59
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('document', '0005_auto_20161008_1232'),
]
operations = [
migrations.CreateModel(
name='Find',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('key', models.CharField(db_index=True, max_length=200)),
('value', models.TextField(blank=True, db_index=True)),
],
),
migrations.CreateModel(
name='Sort',
fields=[
('document', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='sort', serialize=False, to='document.Document')),
('created', models.DateTimeField(blank=True, db_index=True, null=True)),
('name', models.CharField(db_index=True, max_length=1000, null=True)),
('id', models.CharField(db_index=True, max_length=1000, null=True)),
('extension', models.CharField(db_index=True, max_length=1000, null=True)),
('dimensions', models.BigIntegerField(blank=True, db_index=True, null=True)),
('size', models.BigIntegerField(blank=True, db_index=True, null=True)),
('description', models.CharField(db_index=True, max_length=1000, null=True)),
('matches', models.BigIntegerField(blank=True, db_index=True, null=True)),
('user', models.CharField(db_index=True, max_length=1000, null=True)),
('modified', models.DateTimeField(blank=True, db_index=True, null=True)),
('accessed', models.DateTimeField(blank=True, db_index=True, null=True)),
('timesaccessed', models.BigIntegerField(blank=True, db_index=True, null=True)),
('rightslevel', models.BigIntegerField(blank=True, db_index=True, null=True)),
],
),
migrations.AddField(
model_name='find',
name='document',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='find', to='document.Document'),
),
migrations.AlterUniqueTogether(
name='find',
unique_together=set([('document', 'key')]),
),
]

View file

@ -0,0 +1,55 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-10-26 15:59
from __future__ import unicode_literals
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
def migrate_data(apps, schema_editor):
Document = apps.get_model('document', 'Document')
for d in Document.objects.all():
if 'title' not in d.data:
d.data['title'] = d.name
if 'description' not in d.data:
d.data['description'] = d.description
d.save()
class Migration(migrations.Migration):
dependencies = [
('document', '0006_auto_20161026_1259'),
]
operations = [
migrations.RunPython(migrate_data),
migrations.AlterField(
model_name='document',
name='user',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='documents', to=settings.AUTH_USER_MODEL),
),
migrations.AlterUniqueTogether(
name='document',
unique_together=set([]),
),
migrations.RemoveField(
model_name='document',
name='description_sort',
),
migrations.RemoveField(
model_name='document',
name='dimensions_sort',
),
migrations.RemoveField(
model_name='document',
name='name_sort',
),
migrations.RemoveField(
model_name='document',
name='name',
),
migrations.RemoveField(
model_name='document',
name='description',
),
]

View file

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-10-26 16:25
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('document', '0007_auto_20161026_1559'),
]
operations = [
migrations.CreateModel(
name='Facet',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('key', models.CharField(db_index=True, max_length=200)),
('value', models.CharField(db_index=True, max_length=1000)),
('sortvalue', models.CharField(db_index=True, max_length=1000)),
('document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='facets', to='document.Document')),
],
),
migrations.AlterUniqueTogether(
name='facet',
unique_together=set([('document', 'key', 'value')]),
),
]

View file

@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.9.4 on 2016-10-27 12:27
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('auth', '__first__'),
('document', '0008_auto_20161026_1625'),
]
operations = [
migrations.AddField(
model_name='document',
name='groups',
field=models.ManyToManyField(blank=True, related_name='documents', to='auth.Group'),
),
]

View file

@ -5,17 +5,23 @@ from __future__ import division, print_function, absolute_import
import os
import re
from glob import glob
import unicodedata
from six import string_types
from six.moves.urllib.parse import quote, unquote
from django.db import models
from django.db.models import Max
from django.contrib.auth.models import User
from django.db import models, transaction
from django.db.models import Q, Sum, Max
from django.contrib.auth.models import User, Group
from django.db.models.signals import pre_delete
from django.conf import settings
from PIL import Image
import ox
from oxdjango import fields
from oxdjango.sortmodel import get_sort_field
from person.models import get_name_sort
from item.models import Item
from archive.extract import resize_image
from archive.chunk import save_chunk
@ -23,57 +29,249 @@ from archive.chunk import save_chunk
from . import managers
from . import utils
def get_path(f, x): return f.path(x)
def get_path(f, x):
return f.path(x)
class Document(models.Model):
class Meta:
unique_together = ("user", "name", "extension")
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
user = models.ForeignKey(User, related_name='files')
name = models.CharField(max_length=255)
user = models.ForeignKey(User, related_name='documents')
groups = models.ManyToManyField(Group, blank=True, related_name='documents')
extension = models.CharField(max_length=255)
size = models.IntegerField(default=0)
matches = models.IntegerField(default=0)
ratio = models.FloatField(default=1)
ratio = models.FloatField(default=640/1024)
pages = models.IntegerField(default=-1)
width = models.IntegerField(default=-1)
height = models.IntegerField(default=-1)
description = models.TextField(default="")
oshash = models.CharField(max_length=16, unique=True, null=True)
file = models.FileField(default=None, blank=True,null=True, upload_to=get_path)
file = models.FileField(default=None, blank=True, null=True, upload_to=get_path)
objects = managers.DocumentManager()
uploading = models.BooleanField(default = False)
name_sort = models.CharField(max_length=255, null=True)
description_sort = models.CharField(max_length=512, null=True)
dimensions_sort = models.CharField(max_length=512)
uploading = models.BooleanField(default=False)
items = models.ManyToManyField(Item, through='ItemProperties', related_name='documents')
rightslevel = models.IntegerField(db_index=True, default=0)
data = fields.DictField(default={})
def update_access(self, user):
if not user.is_authenticated():
user = None
access, created = Access.objects.get_or_create(document=self, user=user)
if not created:
access.save()
def update_facet(self, key):
current_values = self.get_value(key, [])
if key == 'name':
current_values = []
for k in settings.CONFIG['documentKeys']:
if k.get('sortType') == 'person':
current_values += self.get(k['id'], [])
if not isinstance(current_values, list):
if not current_values:
current_values = []
else:
current_values = [unicode(current_values)]
filter_map = utils.get_by_id(settings.CONFIG['documentKeys'], key).get('filterMap')
if filter_map:
filter_map = re.compile(filter_map)
_current_values = []
for value in current_values:
value = filter_map.findall(value)
if value:
_current_values.append(value[0])
current_values = _current_values
current_values = list(set(current_values))
current_values = [ox.decode_html(ox.strip_tags(v)) for v in current_values]
current_values = [unicodedata.normalize('NFKD', v) for v in current_values]
self.update_facet_values(key, current_values)
def update_facet_values(self, key, current_values):
current_sortvalues = set([value.lower() for value in current_values])
saved_values = [i.value.lower() for i in Facet.objects.filter(document=self, key=key)]
removed_values = filter(lambda i: i not in current_sortvalues, saved_values)
if removed_values:
q = Q()
for v in removed_values:
q |= Q(value__iexact=v)
Facet.objects.filter(document=self, key=key).filter(q).delete()
for value in current_values:
if value.lower() not in saved_values:
sortvalue = value
if key in self.person_keys + ['name']:
sortvalue = get_name_sort(value)
sortvalue = utils.sort_string(sortvalue).lower()[:900]
f, created = Facet.objects.get_or_create(document=self, key=key, value=value, sortvalue=sortvalue)
if created:
Facet.objects.filter(document=self, key=key, value__iexact=value).exclude(value=value).delete()
Facet.objects.filter(key=key, value__iexact=value).exclude(value=value).update(value=value)
saved_values.append(value.lower())
def update_facets(self):
for key in set(self.facet_keys + ['title']):
self.update_facet(key)
def update_find(self):
def save(key, value):
if value not in ('', None):
f, created = Find.objects.get_or_create(document=self, key=key)
if isinstance(value, bool):
value = value and 'true' or 'false'
if isinstance(value, string_types):
value = ox.decode_html(ox.strip_tags(value.strip()))
value = unicodedata.normalize('NFKD', value).lower()
f.value = value
f.save()
else:
Find.objects.filter(document=self, key=key).delete()
with transaction.atomic():
data = self.json()
for key in settings.CONFIG['documentKeys']:
i = key['id']
if i == 'rightslevel':
save(i, self.rightslevel)
elif i not in ('*', 'dimensions') and i not in self.facet_keys:
value = data.get(i)
if isinstance(value, list):
value = u'\n'.join(value)
save(i, value)
base_keys = ('id', 'size', 'dimensions', 'extension', 'matches')
def update_sort(self):
try:
s = self.sort
except Sort.DoesNotExist:
s = Sort(document=self)
s.id = self.id
s.extension = self.extension
s.size = self.size
s.matches = self.matches
if self.extension == 'pdf':
s.dimensions = ox.sort_string('2') + ox.sort_string('%d' % self.pages)
else:
if self.extension == 'html':
resolution_sort = self.dimensions
s.dimensions = ox.sort_string('1') + ox.sort_string('%d' % resolution_sort)
else:
resolution_sort = self.width * self.height
s.dimensions = ox.sort_string('0') + ox.sort_string('%d' % resolution_sort)
def sortNames(values):
sort_value = u''
if values:
sort_value = u'; '.join([get_name_sort(name) for name in values])
if not sort_value:
sort_value = u''
return sort_value
def set_value(s, name, value):
if isinstance(value, string_types):
value = ox.decode_html(value.lower())
if not value:
value = None
setattr(s, name, value)
def get_value(source, key):
if 'value' in key and 'layer' in key['value']:
value = [a.value for a in self.annotations.filter(layer=key['value']['layer']).exclude(value='')]
else:
value = self.get_value(source)
return value
def get_words(source, key):
value = get_value(source, key)
if isinstance(value, list):
value = '\n'.join(value)
value = len(value.split(' ')) if value else 0
return value
for key in filter(lambda k: k.get('sort', False), settings.CONFIG['documentKeys']):
name = key['id']
if name not in self.base_keys:
source = name
sort_type = key.get('sortType', key['type'])
if 'value' in key:
if 'key' in key['value']:
source = key['value']['key']
sort_type = key['value'].get('type', sort_type)
if isinstance(sort_type, list):
sort_type = sort_type[0]
if sort_type == 'title':
value = self.get_value(source, u'Untitled')
value = utils.sort_title(value)[:955]
set_value(s, name, value)
elif sort_type == 'person':
value = sortNames(self.get_value(source, []))
value = utils.sort_string(value)[:955]
set_value(s, name, value)
elif sort_type == 'string':
value = self.get_value(source, u'')
if isinstance(value, list):
value = u','.join(value)
value = utils.sort_string(value)[:955]
set_value(s, name, value)
elif sort_type == 'words':
value = get_words(source, key) if s.duration else None
set_value(s, name, value)
elif sort_type == 'wordsperminute':
value = get_words(source, key)
value = value / (s.duration / 60) if value and s.duration else None
set_value(s, name, value)
elif sort_type in ('length', 'integer', 'time', 'float'):
# can be length of strings or length of arrays, i.e. keywords
if 'layer' in key.get('value', []):
value = self.annotations.filter(layer=key['value']['layer']).count()
else:
value = self.get_value(source)
if isinstance(value, list):
value = len(value)
set_value(s, name, value)
elif sort_type == 'year':
value = self.get_value(source)
set_value(s, name, value)
elif sort_type == 'date':
value = self.get_value(source)
if isinstance(value, string_types):
value = datetime_safe.datetime.strptime(value, '%Y-%m-%d')
set_value(s, name, value)
s.save()
def save(self, *args, **kwargs):
if not self.uploading:
if self.file:
self.size = self.file.size
self.get_info()
self.name_sort = ox.sort_string(self.name or u'')[:255].lower()
if self.description:
self.description_sort = ox.sort_string(self.description)[:512].lower()
if self.extension == 'html':
self.size = len(self.data.get('text', ''))
if self.id:
self.update_sort()
self.update_find()
self.update_facets()
new = False
else:
self.description_sort = None
if self.extension == 'pdf':
self.dimensions_sort = ox.sort_string('1') + ox.sort_string('%d' % self.pages)
else:
resolution_sort = self.width * self.height
self.dimensions_sort = ox.sort_string('0') + ox.sort_string('%d' % resolution_sort)
new = True
super(Document, self).save(*args, **kwargs)
if new:
self.update_sort()
self.update_find()
self.update_facets()
self.update_matches()
def __unicode__(self):
@ -100,40 +298,61 @@ class Document(models.Model):
def get_id(self):
return ox.toAZ(self.id)
def accessible(self, user):
return self.user == user or self.status in ('public', 'featured')
def editable(self, user, item=None):
if not user or user.is_anonymous():
return False
if self.user == user or \
user.is_staff or \
user.profile.capability('canEditDocuments') == True or \
user.profile.capability('canEditDocuments') is True or \
(item and item.editable(user)):
return True
return False
def edit(self, data, user, item=None):
for key in data:
if key == 'name':
data['name'] = re.sub(' \[\d+\]$', '', data['name']).strip()
if not data['name']:
data['name'] = "Untitled"
name = data['name']
num = 1
while Document.objects.filter(name=name, user=self.user, extension=self.extension).exclude(id=self.id).count()>0:
num += 1
name = data['name'] + ' [%d]' % num
self.name = name
elif key == 'description' and not item:
self.description = ox.sanitize_html(data['description'])
if item:
p, created = ItemProperties.objects.get_or_create(item=item, document=self)
if 'description' in data:
p.description = ox.sanitize_html(data['description'])
p.save()
else:
for key in data:
k = list(filter(lambda i: i['id'] == key, settings.CONFIG['documentKeys']))
ktype = k and k[0].get('type') or ''
if key == 'text' and self.extension == 'html':
self.data['text'] = ox.sanitize_html(data['text'], global_attributes=[
'data-name',
'data-type',
'data-value',
'lang'
])
elif ktype == 'text':
self.data[key] = ox.sanitize_html(data[key])
elif ktype == '[text]':
self.data[key] = [ox.sanitize_html(t) for t in data[key]]
elif ktype == '[string]':
self.data[key] = [ox.escape_html(t) for t in data[key]]
elif isinstance(data[key], string_types):
self.data[key] = ox.escape_html(data[key])
elif isinstance(data[key], list):
def cleanup(i):
if isinstance(i, string_types):
i = ox.escape_html(i)
return i
self.data[key] = [cleanup(i) for i in data[key]]
elif isinstance(data[key], int) or isinstance(data[key], float):
self.data[key] = data[key]
else:
self.data[key] = ox.escape_html(data[key])
@property
def dimensions(self):
if self.extension == 'pdf':
return self.pages
elif self.extension == 'html':
return len(self.data.get('text', '').split(' '))
else:
return self.resolution
@ -141,21 +360,43 @@ class Document(models.Model):
def resolution(self):
return [self.width, self.height]
def get_value(self, key, default=None):
if key in (
'extension',
'id',
'matches',
'ratio',
'size',
):
return getattr(self, key)
elif key == 'user':
return self.user.username
else:
return self.data.get(key, default)
def json(self, keys=None, user=None, item=None):
if not keys:
keys=[
keys = [
'description',
'dimensions',
'editable',
'entities',
'extension',
'id',
'name',
'oshash',
'title',
'ratio',
'matches',
'size',
'user',
]
if self.extension in ('html', 'txt'):
keys.append('text')
for key in settings.CONFIG['documentKeys']:
if key['id'] in ('*', ):
continue
if key['id'] not in keys:
keys.append(key['id'])
response = {}
_map = {
}
@ -166,6 +407,10 @@ class Document(models.Model):
response[key] = self.editable(user)
elif key == 'user':
response[key] = self.user.username
elif key == 'accessed':
response[key] = self.accessed.aggregate(Max('access'))['access__max']
elif key == 'timesaccessed':
response[key] = self.accessed.aggregate(Sum('accessed'))['accessed__sum']
elif key == 'entities':
dps = self.documentproperties.select_related('entity').order_by('index')
response[key] = entity_jsons = []
@ -175,8 +420,12 @@ class Document(models.Model):
entity_jsons.append(entity_json)
elif key == 'items':
response[key] = [i['public_id'] for i in self.items.all().values('public_id')]
elif key in self.data:
response[key] = self.data[key]
elif hasattr(self, _map.get(key, key)):
response[key] = getattr(self, _map.get(key,key)) or ''
response[key] = getattr(self, _map.get(key, key)) or ''
if self.extension == 'html':
response['text'] = self.data.get('text', '')
if item:
if isinstance(item, string_types):
item = Item.objects.get(public_id=item)
@ -185,6 +434,10 @@ class Document(models.Model):
if 'description' in keys and d[0].description:
response['description'] = d[0].description
response['index'] = d[0].index
if keys:
for key in list(response):
if key not in keys:
del response[key]
return response
def path(self, name=''):
@ -211,6 +464,9 @@ class Document(models.Model):
return False, 0
def thumbnail(self, size=None, page=None):
if not self.file:
return os.path.join(settings.STATIC_ROOT, 'png/cover.png')
return os.path.join(settings.STATIC_ROOT, 'jpg/list256.jpg')
src = self.file.path
folder = os.path.dirname(src)
if size:
@ -278,12 +534,12 @@ class Document(models.Model):
try:
size = Image.open(image).size
except:
size = [1,1]
size = [1, 1]
else:
if self.width > 0:
size = self.resolution
else:
size = [1,1]
size = [640, 1024]
self.ratio = size[0] / size[1]
return self.ratio
@ -337,6 +593,97 @@ class ItemProperties(models.Model):
if self.description:
self.description_sort = ox.sort_string(self.description)[:512].lower()
else:
self.description_sort = self.document.description_sort
self.description_sort = self.document.sort.description
super(ItemProperties, self).save(*args, **kwargs)
class Access(models.Model):
class Meta:
unique_together = ("document", "user")
access = models.DateTimeField(auto_now=True)
document = models.ForeignKey(Document, related_name='accessed')
user = models.ForeignKey(User, null=True, related_name='accessed_documents')
accessed = models.IntegerField(default=0)
def save(self, *args, **kwargs):
if not self.accessed:
self.accessed = 0
self.accessed += 1
super(Access, self).save(*args, **kwargs)
timesaccessed = Access.objects.filter(document=self.document).aggregate(Sum('accessed'))['accessed__sum']
Sort.objects.filter(document=self.document).update(timesaccessed=timesaccessed, accessed=self.access)
def __unicode__(self):
if self.user:
return u"%s/%s/%s" % (self.user, self.document, self.access)
return u"%s/%s" % (self.item, self.access)
class Facet(models.Model):
'''
used for keys that can have multiple values like people, languages etc.
does not perform to well if total number of items goes above 10k
this happens for keywords in 0xdb right now
'''
class Meta:
unique_together = ("document", "key", "value")
document = models.ForeignKey('Document', related_name='facets')
key = models.CharField(max_length=200, db_index=True)
value = models.CharField(max_length=1000, db_index=True)
sortvalue = models.CharField(max_length=1000, db_index=True)
def __unicode__(self):
return u"%s=%s" % (self.key, self.value)
def save(self, *args, **kwargs):
if not self.sortvalue:
self.sortvalue = utils.sort_string(self.value).lower()[:900]
self.sotvalue = self.sortvalue.lower()
super(Facet, self).save(*args, **kwargs)
Document.facet_keys = []
for key in settings.CONFIG['documentKeys']:
if 'autocomplete' in key and 'autocompleteSortKey' not in key or \
key.get('filter'):
Document.facet_keys.append(key['id'])
Document.person_keys = []
for key in settings.CONFIG['itemKeys']:
if key.get('sortType') == 'person':
Document.person_keys.append(key['id'])
class Find(models.Model):
class Meta:
unique_together = ('document', 'key')
document = models.ForeignKey('Document', related_name='find', db_index=True)
key = models.CharField(max_length=200, db_index=True)
value = models.TextField(blank=True, db_index=settings.DB_GIN_TRGM)
def __unicode__(self):
return u'%s=%s' % (self.key, self.value)
'''
Sort
table constructed based on info in settings.CONFIG['documentKeys']
'''
attrs = {
'__module__': 'document.models',
'document': models.OneToOneField('Document', related_name='sort', primary_key=True),
'created': models.DateTimeField(null=True, blank=True, db_index=True),
}
for key in filter(lambda k: k.get('sort', False) or k['type'] in ('integer', 'time', 'float', 'date', 'enum'), settings.CONFIG['documentKeys']):
name = key['id']
sort_type = key.get('sortType', key['type'])
if isinstance(sort_type, list):
sort_type = sort_type[0]
field = get_sort_field(sort_type)
if name not in attrs:
attrs[name] = field[0](**field[1])
Sort = type('Sort', (models.Model,), attrs)
Sort.fields = [f.name for f in Sort._meta.fields]

View file

@ -0,0 +1,72 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import print_function
from django.core.management.base import BaseCommand
from django.db import connection, transaction
from django.db.models import fields
from django.conf import settings
from . import models
def update_tables(debug=False):
table_name = models.Sort._meta.db_table
cursor = connection.cursor()
db_rows = connection.introspection.get_table_description(cursor, table_name)
db_fields = dict([(row[0], row) for row in db_rows])
db_types = dict([(row[0],
connection.introspection.data_types_reverse[row[1]]) for row in db_rows])
model_fields = ['document_id'] + [f.name for f in models.Sort._meta.fields]
rebuild = False
changes = []
for name in db_types:
if name not in model_fields:
sql = 'ALTER TABLE "%s" DROP COLUMN "%s"' % (table_name, name)
changes.append(sql)
for f in models.Sort._meta.fields:
if not f.primary_key:
name = f.name
col_type = f.db_type(connection)
if name not in db_fields:
sql = 'ALTER TABLE "%s" ADD COLUMN "%s" %s' % (table_name, name, col_type)
changes.append(sql)
sql = 'CREATE INDEX "%s_%s_idx" ON "%s" ("%s")' % (table_name, name,
table_name, name)
changes.append(sql)
rebuild = True
elif f.__class__.__name__ != db_types[name]:
sql = 'ALTER TABLE "%s" DROP COLUMN "%s"' % (table_name, name)
changes.append(sql)
sql = 'ALTER TABLE "%s" ADD COLUMN "%s" %s' % (table_name, name, col_type)
changes.append(sql)
sql = 'CREATE INDEX "%s_%s_idx" ON "%s" ("%s")' % (table_name, name,
table_name, name)
changes.append(sql)
rebuild = True
elif db_types[name] == 'CharField' and db_fields[name][3] != f.max_length:
sql = 'ALTER TABLE "%s" ALTER COLUMN "%s" TYPE %s' % (table_name, name,
col_type)
changes.append(sql)
sql = 'ALTER TABLE "%s" ALTER COLUMN "%s" %s NOT NULL' % (table_name, name,
f.null and "DROP" or "SET")
changes.append(sql)
rebuild = True
if changes:
print("Updating document sort schema...")
for sql in changes:
if debug:
print(sql)
cursor.execute(sql)
transaction.commit()
if rebuild:
print("Updating document sort values...")
ids = [i['id'] for i in models.Document.objects.all().values('id')]
for id in ids:
d = models.Document.objects.get(pk=id)
if debug:
print(d)
d.update_sort()

View file

@ -1,8 +1,11 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import subprocess
from item.utils import sort_title, sort_string, get_by_id
def pdfpages(pdf):
return int(pdfinfo(pdf).get('pages', '0'))

View file

@ -3,7 +3,9 @@
from __future__ import division, print_function, absolute_import
import os
import re
from glob import glob
import unicodedata
from six import string_types
import ox
@ -13,7 +15,8 @@ from oxdjango.decorators import login_required_json
from oxdjango.http import HttpFileResponse
from oxdjango.shortcuts import render_to_json_response, get_object_or_404_json, json_response, HttpErrorJson
from django import forms
from django.db.models import Sum
from django.db.models import Count, Sum
from django.conf import settings
from item import utils
from item.models import Item
@ -35,6 +38,13 @@ def get_document_or_404_json(id):
@login_required_json
def addDocument(request, data):
'''
Create new html document
takes {
title: string
}
or
Adds one or more documents to one or more items
takes {
item: string or [string], // one or more item ids (optional)
@ -46,46 +56,54 @@ def addDocument(request, data):
see: editDocument, findDocuments, getDocument, removeDocument, sortDocuments
'''
response = json_response()
if 'ids' in data:
ids = data['ids']
if 'title' in data:
doc = models.Document(user=request.user, extension='html')
doc.data['title'] = data['title']
doc.save()
response = json_response(status=200, text='created')
response['data'] = doc.json(user=request.user)
add_changelog(request, data, doc.get_id())
else:
ids = [data['id']]
if 'item' in data:
if isinstance(data['item'], string_types):
item = Item.objects.get(public_id=data['item'])
if item.editable(request.user):
for id in ids:
document = models.Document.get(id)
document.add(item)
add_changelog(request, data, item.public_id)
else:
response = json_response(status=403, text='permission denied')
if 'ids' in data:
ids = data['ids']
else:
for item in Item.objects.filter(public_id__in=data['item']):
ids = [data['id']]
if 'item' in data:
if isinstance(data['item'], string_types):
item = Item.objects.get(public_id=data['item'])
if item.editable(request.user):
for id in ids:
document = models.Document.get(id)
document.add(item)
add_changelog(request, data, data['item'])
elif 'entity' in data:
if isinstance(data['entity'], string_types):
entity = Entity.get(data['entity'])
if entity.editable(request.user):
for id in ids:
document = models.Document.get(id)
entity.add(document)
add_changelog(request, data, entity.get_id())
add_changelog(request, data, item.public_id)
else:
response = json_response(status=403, text='permission denied')
else:
response = json_response(status=403, text='permission denied')
else:
for entity in Entity.objects.filter(id__in=map(ox.fromAZ, data['entity'])):
for item in Item.objects.filter(public_id__in=data['item']):
if item.editable(request.user):
for id in ids:
document = models.Document.get(id)
document.add(item)
add_changelog(request, data, data['item'])
elif 'entity' in data:
if isinstance(data['entity'], string_types):
entity = Entity.get(data['entity'])
if entity.editable(request.user):
for id in ids:
document = models.Document.get(id)
entity.add(document)
add_changelog(request, data, data['entity'])
else:
response = json_response(status=500, text='invalid request')
add_changelog(request, data, entity.get_id())
else:
response = json_response(status=403, text='permission denied')
else:
for entity in Entity.objects.filter(id__in=map(ox.fromAZ, data['entity'])):
if entity.editable(request.user):
for id in ids:
document = models.Document.get(id)
entity.add(document)
add_changelog(request, data, data['entity'])
else:
response = json_response(status=500, text='invalid request')
return render_to_json_response(response)
actions.register(addDocument, cache=False)
@ -95,7 +113,8 @@ def editDocument(request, data):
Edits data for a document
takes {
id: string, // document id
name: string, // new document name
key: value, // set new data
description: string // new document description
item: string // item id (optional)
}
@ -126,22 +145,26 @@ actions.register(editDocument, cache=False)
def _order_query(qs, sort, item=None):
prefix = 'sort__'
order_by = []
for e in sort:
operator = e['operator']
if operator != '-':
operator = ''
key = {
'name': 'name_sort',
'description': 'descriptions__description_sort'
if item else 'description_sort',
'dimensions': 'dimensions_sort',
if item else 'description',
'index': 'items__itemproperties__index',
#fixme:
'position': 'id',
'name': 'title',
}.get(e['key'], e['key'])
if key == 'resolution':
order_by.append('%swidth'%operator)
order_by.append('%sheight'%operator)
else:
if '__' not in key:
key = "%s%s" % (prefix, key)
order = '%s%s' % (operator, key)
order_by.append(order)
if order_by:
@ -149,6 +172,24 @@ def _order_query(qs, sort, item=None):
qs = qs.distinct()
return qs
def _order_by_group(query):
if 'sort' in query:
if len(query['sort']) == 1 and query['sort'][0]['key'] == 'items':
order_by = query['sort'][0]['operator'] == '-' and '-items' or 'items'
if query['group'] == "year":
secondary = query['sort'][0]['operator'] == '-' and '-sortvalue' or 'sortvalue'
order_by = (order_by, secondary)
elif query['group'] != "keyword":
order_by = (order_by, 'sortvalue')
else:
order_by = (order_by, 'value')
else:
order_by = query['sort'][0]['operator'] == '-' and '-sortvalue' or 'sortvalue'
order_by = (order_by, 'items')
else:
order_by = ('-sortvalue', 'items')
return order_by
def get_item(query):
for c in query.get('conditions', []):
if c.get('key') == 'item':
@ -162,7 +203,7 @@ def parse_query(data, user):
for key in ('keys', 'group', 'file', 'range', 'position', 'positions', 'sort'):
if key in data:
query[key] = data[key]
query['qs'] = models.Document.objects.find(data, user).exclude(name='')
query['qs'] = models.Document.objects.find(data, user)
query['item'] = get_item(data.get('query', {}))
return query
@ -192,7 +233,24 @@ def findDocuments(request, data):
#order
qs = _order_query(query['qs'], query['sort'], query['item'])
response = json_response()
if 'keys' in data:
if 'group' in query:
response['data']['items'] = []
items = 'items'
document_qs = query['qs']
order_by = _order_by_group(query)
qs = models.Facet.objects.filter(key=query['group']).filter(document__id__in=document_qs)
qs = qs.values('value').annotate(items=Count('id')).order_by(*order_by)
if 'positions' in query:
response['data']['positions'] = {}
ids = [j['value'] for j in qs]
response['data']['positions'] = utils.get_positions(ids, query['positions'])
elif 'range' in data:
qs = qs[query['range'][0]:query['range'][1]]
response['data']['items'] = [{'name': i['value'], 'items': i[items]} for i in qs]
else:
response['data']['items'] = qs.count()
elif 'keys' in data:
qs = qs[query['range'][0]:query['range'][1]]
response['data']['items'] = [l.json(data['keys'], request.user, query['item']) for l in qs]
@ -330,23 +388,15 @@ def upload(request):
if 'chunk' in request.FILES:
if file.editable(request.user):
response = process_chunk(request, file.save_chunk)
response['resultUrl'] = request.build_absolute_uri(file.get_absolute_url())
response['resultUrl'] = file.get_absolute_url()
# id is used to select document in dialog after upload
response['id'] = file.get_id()
return render_to_json_response(response)
#init upload
else:
if not file:
created = False
num = 1
_name = name
while not created:
file, created = models.Document.objects.get_or_create(
user=request.user, name=name, extension=extension)
if not created:
num += 1
name = _name + ' [%d]' % num
file.name = name
file = models.Document(user=request.user, extension=extension)
file.data['title'] = name
file.extension = extension
file.uploading = True
file.save()
@ -361,10 +411,81 @@ def upload(request):
file.width = -1
file.pages = -1
file.save()
upload_url = request.build_absolute_uri('/api/upload/document?id=%s' % file.get_id())
upload_url = '/api/upload/document?id=%s' % file.get_id()
return render_to_json_response({
'uploadUrl': upload_url,
'url': request.build_absolute_uri(file.get_absolute_url()),
'url': file.get_absolute_url(),
'result': 1
})
return render_to_json_response(response)
def autocompleteDocuments(request, data):
'''
Returns autocomplete strings for a given documeny key and search string
takes {
key: string, // document key
value: string, // search string
operator: string, // '=', '==', '^', '$'
query: object, // document query to limit results, see `find`
range: [int, int] // range of results to return
}
returns {
items: [string, ...] // list of matching strings
}
see: autocomplete, autocompleteEntities
'''
if 'range' not in data:
data['range'] = [0, 10]
op = data.get('operator', '=')
key = utils.get_by_id(settings.CONFIG['documentKeys'], data['key'])
order_by = key.get('autocompleteSort', False)
if order_by:
for o in order_by:
if o['operator'] != '-':
o['operator'] = ''
order_by = ['%(operator)ssort__%(key)s' % o for o in order_by]
else:
order_by = ['-items']
qs = parse_query({'query': data.get('query', {})}, request.user)['qs']
response = json_response({})
response['data']['items'] = []
'''
for d in qs:
value = d.json().get(data['key'])
add = False
if value:
if op == '=' and data['value'] in value:
add = True
elif op == '==' and data['value'].lower() == value.lower():
add = True
elif op == '^' and value.lower().startswith(data['value'].lower()):
add = True
if add and value not in response['data']['items']:
response['data']['items'].append(value)
'''
sort_type = key.get('sortType', key.get('type', 'string'))
qs = models.Facet.objects.filter(key=data['key'])
if data['value']:
value = unicodedata.normalize('NFKD', data['value']).lower()
if op == '=':
qs = qs.filter(value__icontains=value)
elif op == '==':
qs = qs.filter(value__iexact=value)
elif op == '^':
qs = qs.filter(value__istartswith=value)
elif op == '$':
qs = qs.filter(value__iendswith=value)
if 'query' in data:
document_query = parse_query({'query': data.get('query', {})}, request.user)['qs']
qs = qs.filter(document__in=document_query)
qs = qs.values('value').annotate(items=Count('id'))
qs = qs.order_by(*order_by)
qs = qs[data['range'][0]:data['range'][1]]
response = json_response({})
response['data']['items'] = [i['value'] for i in qs]
return render_to_json_response(response)
actions.register(autocompleteDocuments)