add resolution/pages to documents, use poppler-utils to extract pdf pages.
This commit is contained in:
parent
5705c9e74d
commit
ff13a30639
6 changed files with 198 additions and 10 deletions
2
README
2
README
|
@ -30,7 +30,7 @@ To run pan.do/ra you need to install and setup:
|
||||||
python-gst0.10 gstreamer0.10-plugins-good gstreamer0.10-plugins-bad \
|
python-gst0.10 gstreamer0.10-plugins-good gstreamer0.10-plugins-bad \
|
||||||
postgresql postgresql-contrib rabbitmq-server \
|
postgresql postgresql-contrib rabbitmq-server \
|
||||||
ffmpeg2theora libav-tools libavcodec-extra-53 \
|
ffmpeg2theora libav-tools libavcodec-extra-53 \
|
||||||
python-ox oxframe imagemagick
|
python-ox oxframe imagemagick poppler-utils
|
||||||
|
|
||||||
|
|
||||||
* Prepare Environment
|
* Prepare Environment
|
||||||
|
|
133
pandora/document/migrations/0005_add_pages_resolution.py
Normal file
133
pandora/document/migrations/0005_add_pages_resolution.py
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import datetime
|
||||||
|
from south.db import db
|
||||||
|
from south.v2 import SchemaMigration
|
||||||
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(SchemaMigration):
|
||||||
|
|
||||||
|
def forwards(self, orm):
|
||||||
|
# Adding field 'Document.pages'
|
||||||
|
db.add_column('document_document', 'pages',
|
||||||
|
self.gf('django.db.models.fields.IntegerField')(default=-1),
|
||||||
|
keep_default=False)
|
||||||
|
|
||||||
|
# Adding field 'Document.width'
|
||||||
|
db.add_column('document_document', 'width',
|
||||||
|
self.gf('django.db.models.fields.IntegerField')(default=-1),
|
||||||
|
keep_default=False)
|
||||||
|
|
||||||
|
# Adding field 'Document.height'
|
||||||
|
db.add_column('document_document', 'height',
|
||||||
|
self.gf('django.db.models.fields.IntegerField')(default=-1),
|
||||||
|
keep_default=False)
|
||||||
|
|
||||||
|
|
||||||
|
def backwards(self, orm):
|
||||||
|
# Deleting field 'Document.pages'
|
||||||
|
db.delete_column('document_document', 'pages')
|
||||||
|
|
||||||
|
# Deleting field 'Document.width'
|
||||||
|
db.delete_column('document_document', 'width')
|
||||||
|
|
||||||
|
# Deleting field 'Document.height'
|
||||||
|
db.delete_column('document_document', 'height')
|
||||||
|
|
||||||
|
|
||||||
|
models = {
|
||||||
|
'auth.group': {
|
||||||
|
'Meta': {'object_name': 'Group'},
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}),
|
||||||
|
'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
|
||||||
|
},
|
||||||
|
'auth.permission': {
|
||||||
|
'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
|
||||||
|
'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||||
|
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
|
||||||
|
},
|
||||||
|
'auth.user': {
|
||||||
|
'Meta': {'object_name': 'User'},
|
||||||
|
'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||||
|
'email': ('django.db.models.fields.EmailField', [], {'max_length': '255', 'blank': 'True'}),
|
||||||
|
'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
|
||||||
|
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||||
|
'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||||
|
'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||||
|
'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||||
|
'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
|
||||||
|
'password': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||||
|
'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
|
||||||
|
'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'})
|
||||||
|
},
|
||||||
|
'contenttypes.contenttype': {
|
||||||
|
'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
|
||||||
|
'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
|
||||||
|
},
|
||||||
|
'document.document': {
|
||||||
|
'Meta': {'unique_together': "(('user', 'name', 'extension'),)", 'object_name': 'Document'},
|
||||||
|
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||||
|
'description': ('django.db.models.fields.TextField', [], {'default': "''"}),
|
||||||
|
'description_sort': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
|
||||||
|
'extension': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||||
|
'file': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '100', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'height': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'items': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'documents'", 'symmetrical': 'False', 'through': "orm['document.ItemProperties']", 'to': "orm['item.Item']"}),
|
||||||
|
'matches': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||||
|
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||||
|
'name_sort': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||||
|
'oshash': ('django.db.models.fields.CharField', [], {'max_length': '16', 'unique': 'True', 'null': 'True'}),
|
||||||
|
'pages': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||||
|
'ratio': ('django.db.models.fields.FloatField', [], {'default': '1'}),
|
||||||
|
'size': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'uploading': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||||
|
'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'files'", 'to': "orm['auth.User']"}),
|
||||||
|
'width': ('django.db.models.fields.IntegerField', [], {'default': '-1'})
|
||||||
|
},
|
||||||
|
'document.itemproperties': {
|
||||||
|
'Meta': {'unique_together': "(('item', 'document'),)", 'object_name': 'ItemProperties'},
|
||||||
|
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||||
|
'description': ('django.db.models.fields.TextField', [], {'default': "''"}),
|
||||||
|
'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'descriptions'", 'to': "orm['document.Document']"}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'index': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'item': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['item.Item']"}),
|
||||||
|
'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'})
|
||||||
|
},
|
||||||
|
'item.item': {
|
||||||
|
'Meta': {'object_name': 'Item'},
|
||||||
|
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||||
|
'data': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||||
|
'external_data': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||||
|
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "'items'", 'blank': 'True', 'to': "orm['auth.Group']"}),
|
||||||
|
'icon': ('django.db.models.fields.files.ImageField', [], {'default': 'None', 'max_length': '100', 'blank': 'True'}),
|
||||||
|
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||||
|
'itemId': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'blank': 'True'}),
|
||||||
|
'json': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||||
|
'level': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}),
|
||||||
|
'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||||
|
'oxdbId': ('django.db.models.fields.CharField', [], {'max_length': '42', 'unique': 'True', 'null': 'True', 'blank': 'True'}),
|
||||||
|
'poster': ('django.db.models.fields.files.ImageField', [], {'default': 'None', 'max_length': '100', 'blank': 'True'}),
|
||||||
|
'poster_frame': ('django.db.models.fields.FloatField', [], {'default': '-1'}),
|
||||||
|
'poster_height': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'poster_source': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
|
||||||
|
'poster_width': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||||
|
'rendered': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||||
|
'stream_aspect': ('django.db.models.fields.FloatField', [], {'default': '1.3333333333333333'}),
|
||||||
|
'stream_info': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||||
|
'torrent': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '1000', 'blank': 'True'}),
|
||||||
|
'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'items'", 'null': 'True', 'to': "orm['auth.User']"})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
complete_apps = ['document']
|
|
@ -18,6 +18,7 @@ from item.models import Item
|
||||||
from archive.extract import resize_image
|
from archive.extract import resize_image
|
||||||
|
|
||||||
import managers
|
import managers
|
||||||
|
import utils
|
||||||
|
|
||||||
|
|
||||||
class Document(models.Model):
|
class Document(models.Model):
|
||||||
|
@ -34,6 +35,9 @@ class Document(models.Model):
|
||||||
size = models.IntegerField(default=0)
|
size = models.IntegerField(default=0)
|
||||||
matches = models.IntegerField(default=0)
|
matches = models.IntegerField(default=0)
|
||||||
ratio = models.FloatField(default=1)
|
ratio = models.FloatField(default=1)
|
||||||
|
pages = models.IntegerField(default=-1)
|
||||||
|
width = models.IntegerField(default=-1)
|
||||||
|
height = models.IntegerField(default=-1)
|
||||||
description = models.TextField(default="")
|
description = models.TextField(default="")
|
||||||
oshash = models.CharField(max_length=16, unique=True, null=True)
|
oshash = models.CharField(max_length=16, unique=True, null=True)
|
||||||
|
|
||||||
|
@ -53,11 +57,13 @@ class Document(models.Model):
|
||||||
self.size = self.file.size
|
self.size = self.file.size
|
||||||
if self.extension == 'pdf' and not os.path.exists(self.thumbnail()):
|
if self.extension == 'pdf' and not os.path.exists(self.thumbnail()):
|
||||||
self.make_thumbnail()
|
self.make_thumbnail()
|
||||||
|
self.get_info()
|
||||||
|
|
||||||
self.name_sort = ox.sort_string(self.name or u'')[:255].lower()
|
self.name_sort = ox.sort_string(self.name or u'')[:255].lower()
|
||||||
self.description_sort = ox.sort_string(self.description or u'')[:512].lower()
|
self.description_sort = ox.sort_string(self.description or u'')[:512].lower()
|
||||||
|
|
||||||
super(Document, self).save(*args, **kwargs)
|
super(Document, self).save(*args, **kwargs)
|
||||||
|
self.update_matches()
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
return self.get_id()
|
return self.get_id()
|
||||||
|
@ -67,6 +73,7 @@ class Document(models.Model):
|
||||||
if created:
|
if created:
|
||||||
p.index = ItemProperties.objects.filter(item=item).aggregate(Max('index'))['index__max'] + 1
|
p.index = ItemProperties.objects.filter(item=item).aggregate(Max('index'))['index__max'] + 1
|
||||||
p.save()
|
p.save()
|
||||||
|
p.document.update_matches()
|
||||||
|
|
||||||
def remove(self, item):
|
def remove(self, item):
|
||||||
ItemProperties.objects.filter(item=item, document=self).delete()
|
ItemProperties.objects.filter(item=item, document=self).delete()
|
||||||
|
@ -110,9 +117,13 @@ class Document(models.Model):
|
||||||
p.description = ox.sanitize_html(data['description'])
|
p.description = ox.sanitize_html(data['description'])
|
||||||
p.save()
|
p.save()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def resolution(self):
|
||||||
|
return [self.width, self.height]
|
||||||
|
|
||||||
def json(self, keys=None, user=None, item=None):
|
def json(self, keys=None, user=None, item=None):
|
||||||
if not keys:
|
if not keys:
|
||||||
keys=[
|
keys=[
|
||||||
'description',
|
'description',
|
||||||
'editable',
|
'editable',
|
||||||
'id',
|
'id',
|
||||||
|
@ -123,6 +134,10 @@ class Document(models.Model):
|
||||||
'ratio',
|
'ratio',
|
||||||
'user'
|
'user'
|
||||||
]
|
]
|
||||||
|
if self.extension == 'pdf':
|
||||||
|
keys.append('pages')
|
||||||
|
else:
|
||||||
|
keys.append('resolution')
|
||||||
response = {}
|
response = {}
|
||||||
_map = {
|
_map = {
|
||||||
}
|
}
|
||||||
|
@ -162,6 +177,7 @@ class Document(models.Model):
|
||||||
f.write(chunk.read())
|
f.write(chunk.read())
|
||||||
if done:
|
if done:
|
||||||
self.uploading = False
|
self.uploading = False
|
||||||
|
self.get_info()
|
||||||
self.get_ratio()
|
self.get_ratio()
|
||||||
self.oshash = ox.oshash(self.file.path)
|
self.oshash = ox.oshash(self.file.path)
|
||||||
self.save()
|
self.save()
|
||||||
|
@ -178,7 +194,9 @@ class Document(models.Model):
|
||||||
else:
|
else:
|
||||||
path = src
|
path = src
|
||||||
if os.path.exists(src) and not os.path.exists(path):
|
if os.path.exists(src) and not os.path.exists(path):
|
||||||
image_size = max(*Image.open(src).size)
|
image_size = max(self.width, self.height)
|
||||||
|
if image_size == -1:
|
||||||
|
image_size = max(*Image.open(src).size)
|
||||||
if size > image_size:
|
if size > image_size:
|
||||||
path = src
|
path = src
|
||||||
else:
|
else:
|
||||||
|
@ -193,16 +211,29 @@ class Document(models.Model):
|
||||||
p = subprocess.Popen(cmd)
|
p = subprocess.Popen(cmd)
|
||||||
p.wait()
|
p.wait()
|
||||||
|
|
||||||
|
def get_info(self):
|
||||||
|
if self.extension == 'pdf':
|
||||||
|
if self.pages == -1:
|
||||||
|
self.width = 1
|
||||||
|
self.height = -1
|
||||||
|
self.pages = utils.pdfpages(self.file.path)
|
||||||
|
elif self.width == -1:
|
||||||
|
self.pages = -1
|
||||||
|
self.width, self.height = Image.open(self.file.path).size
|
||||||
|
|
||||||
def get_ratio(self):
|
def get_ratio(self):
|
||||||
if self.extension == 'pdf':
|
if self.extension == 'pdf':
|
||||||
self.make_thumbnail()
|
self.make_thumbnail()
|
||||||
image = self.thumbnail()
|
image = self.thumbnail()
|
||||||
else:
|
else:
|
||||||
image = self.file.path
|
image = self.file.path
|
||||||
try:
|
if self.width > 0:
|
||||||
size = Image.open(image).size
|
size = self.resolution
|
||||||
except:
|
else:
|
||||||
size = [1,1]
|
try:
|
||||||
|
size = Image.open(image).size
|
||||||
|
except:
|
||||||
|
size = [1,1]
|
||||||
self.ratio = size[0] / size[1]
|
self.ratio = size[0] / size[1]
|
||||||
|
|
||||||
def update_matches(self):
|
def update_matches(self):
|
||||||
|
@ -213,7 +244,7 @@ class Document(models.Model):
|
||||||
url = unquote(urls[0])
|
url = unquote(urls[0])
|
||||||
if url != urls[0]:
|
if url != urls[0]:
|
||||||
urls.append(url)
|
urls.append(url)
|
||||||
matches = 0
|
matches = self.items.count()
|
||||||
for url in urls:
|
for url in urls:
|
||||||
matches += annotation.models.Annotation.objects.filter(value__contains=url).count()
|
matches += annotation.models.Annotation.objects.filter(value__contains=url).count()
|
||||||
matches += item.models.Item.objects.filter(data__contains=url).count()
|
matches += item.models.Item.objects.filter(data__contains=url).count()
|
||||||
|
|
19
pandora/document/utils.py
Normal file
19
pandora/document/utils.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
def pdfpages(pdf):
|
||||||
|
return int(pdfinfo(pdf).get('pages', '0'))
|
||||||
|
|
||||||
|
def pdfinfo(pdf):
|
||||||
|
cmd = ['pdfinfo', pdf]
|
||||||
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
stdout, stderr = p.communicate()
|
||||||
|
data = {}
|
||||||
|
for line in stdout.strip().split('\n'):
|
||||||
|
parts = line.split(':')
|
||||||
|
key = parts[0].lower().strip()
|
||||||
|
if key:
|
||||||
|
data[key] = ':'.join(parts[1:]).strip()
|
||||||
|
return data
|
|
@ -86,8 +86,12 @@ def _order_query(qs, sort):
|
||||||
'name': 'name_sort',
|
'name': 'name_sort',
|
||||||
'description': 'description_sort',
|
'description': 'description_sort',
|
||||||
}.get(e['key'], e['key'])
|
}.get(e['key'], e['key'])
|
||||||
order = '%s%s' % (operator, key)
|
if key == 'resolution':
|
||||||
order_by.append(order)
|
order_by.append('%swidth'%operator)
|
||||||
|
order_by.append('%sheight'%operator)
|
||||||
|
else:
|
||||||
|
order = '%s%s' % (operator, key)
|
||||||
|
order_by.append(order)
|
||||||
if order_by:
|
if order_by:
|
||||||
qs = qs.order_by(*order_by)
|
qs = qs.order_by(*order_by)
|
||||||
qs = qs.distinct()
|
qs = qs.distinct()
|
||||||
|
|
|
@ -46,6 +46,7 @@ apt-get install -y \
|
||||||
libav-tools \
|
libav-tools \
|
||||||
ffmpeg2theora \
|
ffmpeg2theora \
|
||||||
imagemagick \
|
imagemagick \
|
||||||
|
poppler-utils \
|
||||||
ipython \
|
ipython \
|
||||||
postfix \
|
postfix \
|
||||||
postgresql \
|
postgresql \
|
||||||
|
|
Loading…
Reference in a new issue