From ff13a3063964ba6ed13166a1db4468519c595451 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sat, 4 Jan 2014 12:29:11 +0000 Subject: [PATCH] add resolution/pages to documents, use poppler-utils to extract pdf pages. --- README | 2 +- .../migrations/0005_add_pages_resolution.py | 133 ++++++++++++++++++ pandora/document/models.py | 45 +++++- pandora/document/utils.py | 19 +++ pandora/document/views.py | 8 +- vm/firstboot.sh | 1 + 6 files changed, 198 insertions(+), 10 deletions(-) create mode 100644 pandora/document/migrations/0005_add_pages_resolution.py create mode 100644 pandora/document/utils.py diff --git a/README b/README index e72fc84ac..eaf520922 100644 --- a/README +++ b/README @@ -30,7 +30,7 @@ To run pan.do/ra you need to install and setup: python-gst0.10 gstreamer0.10-plugins-good gstreamer0.10-plugins-bad \ postgresql postgresql-contrib rabbitmq-server \ ffmpeg2theora libav-tools libavcodec-extra-53 \ - python-ox oxframe imagemagick + python-ox oxframe imagemagick poppler-utils * Prepare Environment diff --git a/pandora/document/migrations/0005_add_pages_resolution.py b/pandora/document/migrations/0005_add_pages_resolution.py new file mode 100644 index 000000000..38547d717 --- /dev/null +++ b/pandora/document/migrations/0005_add_pages_resolution.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + # Adding field 'Document.pages' + db.add_column('document_document', 'pages', + self.gf('django.db.models.fields.IntegerField')(default=-1), + keep_default=False) + + # Adding field 'Document.width' + db.add_column('document_document', 'width', + self.gf('django.db.models.fields.IntegerField')(default=-1), + keep_default=False) + + # Adding field 'Document.height' + db.add_column('document_document', 'height', + self.gf('django.db.models.fields.IntegerField')(default=-1), + keep_default=False) + + + def backwards(self, orm): + # Deleting field 'Document.pages' + db.delete_column('document_document', 'pages') + + # Deleting field 'Document.width' + db.delete_column('document_document', 'width') + + # Deleting field 'Document.height' + db.delete_column('document_document', 'height') + + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'ordering': "('content_type__app_label', 'content_type__model', 'codename')", 'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '255', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '255'}) + }, + 'contenttypes.contenttype': { + 'Meta': {'ordering': "('name',)", 'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + }, + 'document.document': { + 'Meta': {'unique_together': "(('user', 'name', 'extension'),)", 'object_name': 'Document'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'default': "''"}), + 'description_sort': ('django.db.models.fields.CharField', [], {'max_length': '512'}), + 'extension': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'file': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '100', 'null': 'True', 'blank': 'True'}), + 'height': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'items': ('django.db.models.fields.related.ManyToManyField', [], {'related_name': "'documents'", 'symmetrical': 'False', 'through': "orm['document.ItemProperties']", 'to': "orm['item.Item']"}), + 'matches': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'name_sort': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'oshash': ('django.db.models.fields.CharField', [], {'max_length': '16', 'unique': 'True', 'null': 'True'}), + 'pages': ('django.db.models.fields.IntegerField', [], {'default': '-1'}), + 'ratio': ('django.db.models.fields.FloatField', [], {'default': '1'}), + 'size': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'uploading': ('django.db.models.fields.BooleanField', [], {'default': 'False'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'files'", 'to': "orm['auth.User']"}), + 'width': ('django.db.models.fields.IntegerField', [], {'default': '-1'}) + }, + 'document.itemproperties': { + 'Meta': {'unique_together': "(('item', 'document'),)", 'object_name': 'ItemProperties'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'default': "''"}), + 'document': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'descriptions'", 'to': "orm['document.Document']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'index': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'item': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['item.Item']"}), + 'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}) + }, + 'item.item': { + 'Meta': {'object_name': 'Item'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'data': ('ox.django.fields.DictField', [], {'default': '{}'}), + 'external_data': ('ox.django.fields.DictField', [], {'default': '{}'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'symmetrical': 'False', 'related_name': "'items'", 'blank': 'True', 'to': "orm['auth.Group']"}), + 'icon': ('django.db.models.fields.files.ImageField', [], {'default': 'None', 'max_length': '100', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'itemId': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '128', 'blank': 'True'}), + 'json': ('ox.django.fields.DictField', [], {'default': '{}'}), + 'level': ('django.db.models.fields.IntegerField', [], {'db_index': 'True'}), + 'modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'oxdbId': ('django.db.models.fields.CharField', [], {'max_length': '42', 'unique': 'True', 'null': 'True', 'blank': 'True'}), + 'poster': ('django.db.models.fields.files.ImageField', [], {'default': 'None', 'max_length': '100', 'blank': 'True'}), + 'poster_frame': ('django.db.models.fields.FloatField', [], {'default': '-1'}), + 'poster_height': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'poster_source': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'poster_width': ('django.db.models.fields.IntegerField', [], {'default': '0'}), + 'rendered': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'db_index': 'True'}), + 'stream_aspect': ('django.db.models.fields.FloatField', [], {'default': '1.3333333333333333'}), + 'stream_info': ('ox.django.fields.DictField', [], {'default': '{}'}), + 'torrent': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '1000', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'items'", 'null': 'True', 'to': "orm['auth.User']"}) + } + } + + complete_apps = ['document'] \ No newline at end of file diff --git a/pandora/document/models.py b/pandora/document/models.py index 79b334e4e..5aecb08fd 100644 --- a/pandora/document/models.py +++ b/pandora/document/models.py @@ -18,6 +18,7 @@ from item.models import Item from archive.extract import resize_image import managers +import utils class Document(models.Model): @@ -34,6 +35,9 @@ class Document(models.Model): size = models.IntegerField(default=0) matches = models.IntegerField(default=0) ratio = models.FloatField(default=1) + pages = models.IntegerField(default=-1) + width = models.IntegerField(default=-1) + height = models.IntegerField(default=-1) description = models.TextField(default="") oshash = models.CharField(max_length=16, unique=True, null=True) @@ -53,11 +57,13 @@ class Document(models.Model): self.size = self.file.size if self.extension == 'pdf' and not os.path.exists(self.thumbnail()): self.make_thumbnail() + self.get_info() self.name_sort = ox.sort_string(self.name or u'')[:255].lower() self.description_sort = ox.sort_string(self.description or u'')[:512].lower() super(Document, self).save(*args, **kwargs) + self.update_matches() def __unicode__(self): return self.get_id() @@ -67,6 +73,7 @@ class Document(models.Model): if created: p.index = ItemProperties.objects.filter(item=item).aggregate(Max('index'))['index__max'] + 1 p.save() + p.document.update_matches() def remove(self, item): ItemProperties.objects.filter(item=item, document=self).delete() @@ -110,9 +117,13 @@ class Document(models.Model): p.description = ox.sanitize_html(data['description']) p.save() + @property + def resolution(self): + return [self.width, self.height] + def json(self, keys=None, user=None, item=None): if not keys: - keys=[ + keys=[ 'description', 'editable', 'id', @@ -123,6 +134,10 @@ class Document(models.Model): 'ratio', 'user' ] + if self.extension == 'pdf': + keys.append('pages') + else: + keys.append('resolution') response = {} _map = { } @@ -162,6 +177,7 @@ class Document(models.Model): f.write(chunk.read()) if done: self.uploading = False + self.get_info() self.get_ratio() self.oshash = ox.oshash(self.file.path) self.save() @@ -178,7 +194,9 @@ class Document(models.Model): else: path = src if os.path.exists(src) and not os.path.exists(path): - image_size = max(*Image.open(src).size) + image_size = max(self.width, self.height) + if image_size == -1: + image_size = max(*Image.open(src).size) if size > image_size: path = src else: @@ -193,16 +211,29 @@ class Document(models.Model): p = subprocess.Popen(cmd) p.wait() + def get_info(self): + if self.extension == 'pdf': + if self.pages == -1: + self.width = 1 + self.height = -1 + self.pages = utils.pdfpages(self.file.path) + elif self.width == -1: + self.pages = -1 + self.width, self.height = Image.open(self.file.path).size + def get_ratio(self): if self.extension == 'pdf': self.make_thumbnail() image = self.thumbnail() else: image = self.file.path - try: - size = Image.open(image).size - except: - size = [1,1] + if self.width > 0: + size = self.resolution + else: + try: + size = Image.open(image).size + except: + size = [1,1] self.ratio = size[0] / size[1] def update_matches(self): @@ -213,7 +244,7 @@ class Document(models.Model): url = unquote(urls[0]) if url != urls[0]: urls.append(url) - matches = 0 + matches = self.items.count() for url in urls: matches += annotation.models.Annotation.objects.filter(value__contains=url).count() matches += item.models.Item.objects.filter(data__contains=url).count() diff --git a/pandora/document/utils.py b/pandora/document/utils.py new file mode 100644 index 000000000..8e94c1b11 --- /dev/null +++ b/pandora/document/utils.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 + +import subprocess + +def pdfpages(pdf): + return int(pdfinfo(pdf).get('pages', '0')) + +def pdfinfo(pdf): + cmd = ['pdfinfo', pdf] + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + data = {} + for line in stdout.strip().split('\n'): + parts = line.split(':') + key = parts[0].lower().strip() + if key: + data[key] = ':'.join(parts[1:]).strip() + return data diff --git a/pandora/document/views.py b/pandora/document/views.py index 86c3e5f22..1a511d1bc 100644 --- a/pandora/document/views.py +++ b/pandora/document/views.py @@ -86,8 +86,12 @@ def _order_query(qs, sort): 'name': 'name_sort', 'description': 'description_sort', }.get(e['key'], e['key']) - order = '%s%s' % (operator, key) - order_by.append(order) + if key == 'resolution': + order_by.append('%swidth'%operator) + order_by.append('%sheight'%operator) + else: + order = '%s%s' % (operator, key) + order_by.append(order) if order_by: qs = qs.order_by(*order_by) qs = qs.distinct() diff --git a/vm/firstboot.sh b/vm/firstboot.sh index e15c02578..2c60b6f11 100755 --- a/vm/firstboot.sh +++ b/vm/firstboot.sh @@ -46,6 +46,7 @@ apt-get install -y \ libav-tools \ ffmpeg2theora \ imagemagick \ + poppler-utils \ ipython \ postfix \ postgresql \