add resolution/pages to documents, use poppler-utils to extract pdf pages.
This commit is contained in:
parent
5705c9e74d
commit
ff13a30639
6 changed files with 198 additions and 10 deletions
2
README
2
README
|
@ -30,7 +30,7 @@ To run pan.do/ra you need to install and setup:
|
|||
python-gst0.10 gstreamer0.10-plugins-good gstreamer0.10-plugins-bad \
|
||||
postgresql postgresql-contrib rabbitmq-server \
|
||||
ffmpeg2theora libav-tools libavcodec-extra-53 \
|
||||
python-ox oxframe imagemagick
|
||||
python-ox oxframe imagemagick poppler-utils
|
||||
|
||||
|
||||
* Prepare Environment
|
||||
|
|
133
pandora/document/migrations/0005_add_pages_resolution.py
Normal file
133
pandora/document/migrations/0005_add_pages_resolution.py
Normal file
|
@ -0,0 +1,133 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import datetime
|
||||
from south.db import db
|
||||
from south.v2 import SchemaMigration
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(SchemaMigration):
|
||||
|
||||
def forwards(self, orm):
|
||||
# Adding field 'Document.pages'
|
||||
db.add_column('document_document', 'pages',
|
||||
self.gf('django.db.models.fields.IntegerField')(default=-1),
|
||||
keep_default=False)
|
||||
|
||||
# Adding field 'Document.width'
|
||||
db.add_column('document_document', 'width',
|
||||
self.gf('django.db.models.fields.IntegerField')(default=-1),
|
||||
keep_default=False)
|
||||
|
||||
# Adding field 'Document.height'
|
||||
db.add_column('document_document', 'height',
|
||||
self.gf('django.db.models.fields.IntegerField')(default=-1),
|
||||
keep_default=False)
|
||||
|
||||
|
||||
def backwards(self, orm):
|
||||
# Deleting field 'Document.pages'
|
||||
db.delete_column('document_document', 'pages')
|
||||
|
||||
# Deleting field 'Document.width'
|
||||
db.delete_column('document_document', 'width')
|
||||
|
||||
# Deleting field 'Document.height'
|
||||
db.delete_column('document_document', 'height')
|
||||
|
||||
|
||||
models = {
|
||||
'auth.group': {
|
||||
'Meta': {'object_name': 'Group'},
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}),
|
||||
'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'})
|
||||
},
|
||||
'auth.permission': {
|
||||
'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'},
|
||||
'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||
'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'max_length': '50'})
|
||||
},
|
||||
'auth.user': {
|
||||
'Meta': {'object_name': 'User'},
|
||||
'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'email': ('django.db.models.fields.EmailField', [], {'max_length': '255', 'blank': 'True'}),
|
||||
'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
|
||||
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}),
|
||||
'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}),
|
||||
'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}),
|
||||
'password': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}),
|
||||
'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'})
|
||||
},
|
||||
'contenttypes.contenttype': {
|
||||
'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"},
|
||||
'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'max_length': '100'})
|
||||
},
|
||||
'document.document': {
|
||||
'Meta': {'unique_together': "(('user', 'name', 'extension'),)", 'object_name': 'Document'},
|
||||
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'description': ('django.db.models.fields.TextField', [], {'default': "''"}),
|
||||
'description_sort': ('django.db.models.fields.CharField', [], {'max_length': '512'}),
|
||||
'extension': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'file': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '100', 'null': 'True', 'blank': 'True'}),
|
||||
'height': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'items': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'documents'", 'symmetrical': 'False', 'through': "orm['document.ItemProperties']", 'to': "orm['item.Item']"}),
|
||||
'matches': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||
'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'name_sort': ('django.db.models.fields.CharField', [], {'max_length': '255'}),
|
||||
'oshash': ('django.db.models.fields.CharField', [], {'max_length': '16', 'unique': 'True', 'null': 'True'}),
|
||||
'pages': ('django.db.models.fields.IntegerField', [], {'default': '-1'}),
|
||||
'ratio': ('django.db.models.fields.FloatField', [], {'default': '1'}),
|
||||
'size': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'uploading': ('django.db.models.fields.BooleanField', [], {'default': 'False'}),
|
||||
'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'files'", 'to': "orm['auth.User']"}),
|
||||
'width': ('django.db.models.fields.IntegerField', [], {'default': '-1'})
|
||||
},
|
||||
'document.itemproperties': {
|
||||
'Meta': {'unique_together': "(('item', 'document'),)", 'object_name': 'ItemProperties'},
|
||||
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'description': ('django.db.models.fields.TextField', [], {'default': "''"}),
|
||||
'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'descriptions'", 'to': "orm['document.Document']"}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'index': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'item': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['item.Item']"}),
|
||||
'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'})
|
||||
},
|
||||
'item.item': {
|
||||
'Meta': {'object_name': 'Item'},
|
||||
'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}),
|
||||
'data': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||
'external_data': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||
'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "'items'", 'blank': 'True', 'to': "orm['auth.Group']"}),
|
||||
'icon': ('django.db.models.fields.files.ImageField', [], {'default': 'None', 'max_length': '100', 'blank': 'True'}),
|
||||
'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}),
|
||||
'itemId': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'blank': 'True'}),
|
||||
'json': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||
'level': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}),
|
||||
'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}),
|
||||
'oxdbId': ('django.db.models.fields.CharField', [], {'max_length': '42', 'unique': 'True', 'null': 'True', 'blank': 'True'}),
|
||||
'poster': ('django.db.models.fields.files.ImageField', [], {'default': 'None', 'max_length': '100', 'blank': 'True'}),
|
||||
'poster_frame': ('django.db.models.fields.FloatField', [], {'default': '-1'}),
|
||||
'poster_height': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'poster_source': ('django.db.models.fields.TextField', [], {'blank': 'True'}),
|
||||
'poster_width': ('django.db.models.fields.IntegerField', [], {'default': '0'}),
|
||||
'rendered': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}),
|
||||
'stream_aspect': ('django.db.models.fields.FloatField', [], {'default': '1.3333333333333333'}),
|
||||
'stream_info': ('ox.django.fields.DictField', [], {'default': '{}'}),
|
||||
'torrent': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '1000', 'blank': 'True'}),
|
||||
'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'items'", 'null': 'True', 'to': "orm['auth.User']"})
|
||||
}
|
||||
}
|
||||
|
||||
complete_apps = ['document']
|
|
@ -18,6 +18,7 @@ from item.models import Item
|
|||
from archive.extract import resize_image
|
||||
|
||||
import managers
|
||||
import utils
|
||||
|
||||
|
||||
class Document(models.Model):
|
||||
|
@ -34,6 +35,9 @@ class Document(models.Model):
|
|||
size = models.IntegerField(default=0)
|
||||
matches = models.IntegerField(default=0)
|
||||
ratio = models.FloatField(default=1)
|
||||
pages = models.IntegerField(default=-1)
|
||||
width = models.IntegerField(default=-1)
|
||||
height = models.IntegerField(default=-1)
|
||||
description = models.TextField(default="")
|
||||
oshash = models.CharField(max_length=16, unique=True, null=True)
|
||||
|
||||
|
@ -53,11 +57,13 @@ class Document(models.Model):
|
|||
self.size = self.file.size
|
||||
if self.extension == 'pdf' and not os.path.exists(self.thumbnail()):
|
||||
self.make_thumbnail()
|
||||
self.get_info()
|
||||
|
||||
self.name_sort = ox.sort_string(self.name or u'')[:255].lower()
|
||||
self.description_sort = ox.sort_string(self.description or u'')[:512].lower()
|
||||
|
||||
super(Document, self).save(*args, **kwargs)
|
||||
self.update_matches()
|
||||
|
||||
def __unicode__(self):
|
||||
return self.get_id()
|
||||
|
@ -67,6 +73,7 @@ class Document(models.Model):
|
|||
if created:
|
||||
p.index = ItemProperties.objects.filter(item=item).aggregate(Max('index'))['index__max'] + 1
|
||||
p.save()
|
||||
p.document.update_matches()
|
||||
|
||||
def remove(self, item):
|
||||
ItemProperties.objects.filter(item=item, document=self).delete()
|
||||
|
@ -110,6 +117,10 @@ class Document(models.Model):
|
|||
p.description = ox.sanitize_html(data['description'])
|
||||
p.save()
|
||||
|
||||
@property
|
||||
def resolution(self):
|
||||
return [self.width, self.height]
|
||||
|
||||
def json(self, keys=None, user=None, item=None):
|
||||
if not keys:
|
||||
keys=[
|
||||
|
@ -123,6 +134,10 @@ class Document(models.Model):
|
|||
'ratio',
|
||||
'user'
|
||||
]
|
||||
if self.extension == 'pdf':
|
||||
keys.append('pages')
|
||||
else:
|
||||
keys.append('resolution')
|
||||
response = {}
|
||||
_map = {
|
||||
}
|
||||
|
@ -162,6 +177,7 @@ class Document(models.Model):
|
|||
f.write(chunk.read())
|
||||
if done:
|
||||
self.uploading = False
|
||||
self.get_info()
|
||||
self.get_ratio()
|
||||
self.oshash = ox.oshash(self.file.path)
|
||||
self.save()
|
||||
|
@ -178,6 +194,8 @@ class Document(models.Model):
|
|||
else:
|
||||
path = src
|
||||
if os.path.exists(src) and not os.path.exists(path):
|
||||
image_size = max(self.width, self.height)
|
||||
if image_size == -1:
|
||||
image_size = max(*Image.open(src).size)
|
||||
if size > image_size:
|
||||
path = src
|
||||
|
@ -193,12 +211,25 @@ class Document(models.Model):
|
|||
p = subprocess.Popen(cmd)
|
||||
p.wait()
|
||||
|
||||
def get_info(self):
|
||||
if self.extension == 'pdf':
|
||||
if self.pages == -1:
|
||||
self.width = 1
|
||||
self.height = -1
|
||||
self.pages = utils.pdfpages(self.file.path)
|
||||
elif self.width == -1:
|
||||
self.pages = -1
|
||||
self.width, self.height = Image.open(self.file.path).size
|
||||
|
||||
def get_ratio(self):
|
||||
if self.extension == 'pdf':
|
||||
self.make_thumbnail()
|
||||
image = self.thumbnail()
|
||||
else:
|
||||
image = self.file.path
|
||||
if self.width > 0:
|
||||
size = self.resolution
|
||||
else:
|
||||
try:
|
||||
size = Image.open(image).size
|
||||
except:
|
||||
|
@ -213,7 +244,7 @@ class Document(models.Model):
|
|||
url = unquote(urls[0])
|
||||
if url != urls[0]:
|
||||
urls.append(url)
|
||||
matches = 0
|
||||
matches = self.items.count()
|
||||
for url in urls:
|
||||
matches += annotation.models.Annotation.objects.filter(value__contains=url).count()
|
||||
matches += item.models.Item.objects.filter(data__contains=url).count()
|
||||
|
|
19
pandora/document/utils.py
Normal file
19
pandora/document/utils.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
import subprocess
|
||||
|
||||
def pdfpages(pdf):
|
||||
return int(pdfinfo(pdf).get('pages', '0'))
|
||||
|
||||
def pdfinfo(pdf):
|
||||
cmd = ['pdfinfo', pdf]
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = p.communicate()
|
||||
data = {}
|
||||
for line in stdout.strip().split('\n'):
|
||||
parts = line.split(':')
|
||||
key = parts[0].lower().strip()
|
||||
if key:
|
||||
data[key] = ':'.join(parts[1:]).strip()
|
||||
return data
|
|
@ -86,6 +86,10 @@ def _order_query(qs, sort):
|
|||
'name': 'name_sort',
|
||||
'description': 'description_sort',
|
||||
}.get(e['key'], e['key'])
|
||||
if key == 'resolution':
|
||||
order_by.append('%swidth'%operator)
|
||||
order_by.append('%sheight'%operator)
|
||||
else:
|
||||
order = '%s%s' % (operator, key)
|
||||
order_by.append(order)
|
||||
if order_by:
|
||||
|
|
|
@ -46,6 +46,7 @@ apt-get install -y \
|
|||
libav-tools \
|
||||
ffmpeg2theora \
|
||||
imagemagick \
|
||||
poppler-utils \
|
||||
ipython \
|
||||
postfix \
|
||||
postgresql \
|
||||
|
|
Loading…
Reference in a new issue