From 48e6d4af6f282aa8ff927346f6a27ee42de867bd Mon Sep 17 00:00:00 2001 From: j Date: Mon, 12 Jun 2023 14:30:32 +0100 Subject: [PATCH] render search result highlights as images and show in pages view --- pandora/document/fulltext.py | 51 +++++++++ pandora/document/managers/pages.py | 3 - pandora/document/views.py | 5 + requirements.txt | 2 + static/js/collection.js | 5 +- static/js/documentPages.js | 41 +++++-- static/js/pageDialog.js | 167 +++++++++++++++++++++++++++++ static/js/utils.js | 11 ++ 8 files changed, 273 insertions(+), 12 deletions(-) create mode 100644 static/js/pageDialog.js diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py index 0de8adca2..899f9eb54 100644 --- a/pandora/document/fulltext.py +++ b/pandora/document/fulltext.py @@ -122,6 +122,57 @@ class FulltextMixin: from_ += len(res['hits']['hits']) return ids + def highlight_page(self, page, query, size): + import pypdfium2 as pdfium + from PIL import Image + from PIL import ImageDraw + + pdfpath = self.file.path + pagenumber = int(page) - 1 + jpg = tempfile.NamedTemporaryFile(suffix='.jpg') + output = jpg.name + TINT_COLOR = (255, 255, 0) + TRANSPARENCY = .45 + OPACITY = int(255 * TRANSPARENCY) + scale = 150/72 + + pdf = pdfium.PdfDocument(pdfpath) + page = pdf[pagenumber] + + bitmap = page.render(scale=scale, rotation=0) + img = bitmap.to_pil().convert('RGBA') + overlay = Image.new('RGBA', img.size, TINT_COLOR+(0,)) + draw = ImageDraw.Draw(overlay) + + textpage = page.get_textpage() + search = textpage.search(query) + result = search.get_next() + while result: + pos, steps = result + steps += 1 + while steps: + box = textpage.get_charbox(pos) + box = [b*scale for b in box] + tl = (box[0], img.size[1] - box[3]) + br = (box[2], img.size[1] - box[1]) + draw.rectangle((tl, br), fill=TINT_COLOR+(OPACITY,)) + pos += 1 + steps -= 1 + result = search.get_next() + img = Image.alpha_composite(img, overlay) + img = img.convert("RGB") + aspect = img.size[0] / img.size[1] + resize_method = Image.ANTIALIAS + if img.size[0] >= img.size[1]: + width = size + height = int(size / aspect) + else: + width = int(size / aspect) + height = size + img = img.resize((width, height), resize_method) + img.save(output, quality=72) + return jpg + class FulltextPageMixin(FulltextMixin): _ES_INDEX = "document-page-index" diff --git a/pandora/document/managers/pages.py b/pandora/document/managers/pages.py index 82ac7454c..5a3a4ab7d 100644 --- a/pandora/document/managers/pages.py +++ b/pandora/document/managers/pages.py @@ -54,8 +54,6 @@ def parseCondition(condition, user, item=None, owner=None): if not op: op = '=' - print(k, op, v) - if op.startswith('!'): return buildCondition(k, op[1:], v, user, True, owner=owner) else: @@ -136,7 +134,6 @@ def buildCondition(k, op, v, user, exclude=False, owner=None): q = Q(id=0) return q elif key_config.get('fulltext'): - print('fulltext?') qs = models.Page.find_fulltext_ids(v) q = Q(id__in=qs) if exclude: diff --git a/pandora/document/views.py b/pandora/document/views.py index 388bbd436..47bf5bd85 100644 --- a/pandora/document/views.py +++ b/pandora/document/views.py @@ -14,6 +14,7 @@ from oxdjango.shortcuts import render_to_json_response, get_object_or_404_json, from django import forms from django.db.models import Count, Sum from django.conf import settings +from django.http import HttpResponse from item import utils from item.models import Item @@ -381,8 +382,12 @@ def file(request, id, name=None): def thumbnail(request, id, size=256, page=None): size = int(size) document = get_document_or_404_json(request, id) + if "q" in request.GET and page: + img = document.highlight_page(page, request.GET["q"], size) + return HttpResponse(img, content_type="image/jpeg") return HttpFileResponse(document.thumbnail(size, page=page)) + @login_required_json def upload(request): if 'id' in request.GET: diff --git a/requirements.txt b/requirements.txt index a3be04876..0860ebfb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,5 @@ yt-dlp>=2022.3.8.2 python-memcached elasticsearch<8 future +pytz +pypdfium2 diff --git a/static/js/collection.js b/static/js/collection.js index c3c29167b..1dc50e734 100644 --- a/static/js/collection.js +++ b/static/js/collection.js @@ -153,7 +153,8 @@ pandora.ui.collection = function() { id: data.id, pages: data.pages, query: ui.findDocuments, - ratio: data.ratio + ratio: data.ratio, + title: data.title } } }; @@ -164,7 +165,7 @@ pandora.ui.collection = function() { }), callback); return Ox.clone(data, true); }, - keys: ['id', 'pages', 'title', 'ratio', 'modified'], + keys: ['pages'].concat(keys), selected: ui.listSelection, size: 192, sort: ui.collectionSort.concat([ diff --git a/static/js/documentPages.js b/static/js/documentPages.js index 9ec8b49a6..6be22490e 100644 --- a/static/js/documentPages.js +++ b/static/js/documentPages.js @@ -25,8 +25,12 @@ pandora.ui.documentPages = function(options) { self.width = self.options.ratio > 1 ? self.size : Math.round(self.size * self.options.ratio); self.height = self.options.ratio > 1 ? Math.round(self.size / self.options.ratio) : self.size; - function renderPage(page) { + function renderPage(page, query) { + self.pages.push(page) var url = `/documents/${self.options.id}/${self.size}p${page}.jpg` + if (query) { + url += '?q=' + encodeURIComponent(query) + } var $item = Ox.IconItem({ imageHeight: self.height, imageWidth: self.width, @@ -45,12 +49,11 @@ pandora.ui.documentPages = function(options) { that.append($item); } - function renderPages(pages) { - console.log('renderPages', pages, self.options.pages) + function renderPages(pages, query) { + self.pages = [] if (pages) { - console.log('renderPages', pages) pages.forEach(page => { - renderPage(page.page) + renderPage(page.page, query) }) } else { if (self.options.pages > 1) { @@ -78,7 +81,7 @@ pandora.ui.documentPages = function(options) { range: [0, 100], keys: ['page'] }, function(result) { - renderPages(result.data.items) + renderPages(result.data.items, pandora.getFulltextQuery()) }) } else { renderPages() @@ -97,7 +100,31 @@ pandora.ui.documentPages = function(options) { } function singleclick(data) { - // .. + var $item, $target = $(data.target), annotation, item, points, set; + if ($target.parent().parent().is('.OxSpecialTarget')) { + $target = $target.parent().parent(); + } + if ($target.is('.OxSpecialTarget')) { + $item = $target.parent().parent(); + var page = $item.data('page') + if (!pandora.$ui.pageDialog) { + pandora.$ui.pageDialog = pandora.ui.pageDialog({ + document: self.options.id, + page: page, + pages: self.pages, + query: pandora.getFulltextQuery(), + dimensions: [self.width, self.height], + title: self.options.title, + size: self.size + }); + pandora.$ui.pageDialog.open() + } else { + pandora.$ui.pageDialog.update({ + page: page, + pages: self.pages, + }); + } + } } return that; diff --git a/static/js/pageDialog.js b/static/js/pageDialog.js new file mode 100644 index 000000000..3e8364860 --- /dev/null +++ b/static/js/pageDialog.js @@ -0,0 +1,167 @@ +'use strict'; + + +pandora.ui.pageDialog = function(options, self) { + self = self || {} + self.options = Ox.extend({ + }, options); + + console.log(options) + + var dialogHeight = Math.round((window.innerHeight - 48) * 0.9) - 24, + dialogWidth = Math.round(window.innerWidth * 0.9) - 48, + isItemView = !pandora.$ui.documentsDialog, + + $content = Ox.Element(), + + that = Ox.Dialog({ + closeButton: true, + content: $content, + focus: false, + height: dialogHeight, + maximizeButton: true, + minHeight: 256, + minWidth: 512, + padding: 0, + removeOnClose: true, + title: '', + width: dialogWidth + }) + .bindEvent({ + close: function() { + delete pandora.$ui.pageDialog; + }, + resize: function(data) { + dialogHeight = data.height; + dialogWidth = data.width; + $content.options({ + height: dialogHeight, + width: dialogWidth + }); + }, + }), + + $infoButton = Ox.Button({ + title: 'info', + tooltip: Ox._('Open PDF'), + type: 'image' + }) + .css({ + position: 'absolute', + right: '24px', + top: '4px' + }) + .bindEvent({ + click: function(data) { + that.close(); + pandora.URL.push(`/documents/${self.options.document}/${self.options.page}`); + } + }), + + $selectButton = Ox.ButtonGroup({ + buttons: [ + {id: 'previous', title: 'left', tooltip: Ox._('Previous')}, + {id: 'next', title: 'right', tooltip: Ox._('Next')} + ], + type: 'image' + }) + .css({ + position: 'absolute', + right: '44px', + top: '4px' + }) + [self.options.pages.length > 1 ? 'show' : 'hide']() + .bindEvent({ + click: function(data) { + var pageIdx = self.options.pages.indexOf(self.options.page) + if (data.id == 'previous') { + pageIdx-- + } else { + pageIdx++ + } + if (pageIdx < 0) { + pageIdx = self.options.pages.length - 1 + } else if (pageIdx >= self.options.pages.length) { + pageIdx = 0 + } + that.update({ + page: self.options.pages[pageIdx] + }) + } + }); + + $(that.find('.OxBar')[0]) + .append($infoButton) + .append($selectButton); + // fixme: why is this needed? + $(that.find('.OxContent')[0]).css({overflow: 'hidden'}); + + setTitle(); + setContent(); + + function setContent() { + var url = `/documents/${self.options.document}/1024p${self.options.page}.jpg` + if (self.options.query) { + url += '?q=' + encodeURIComponent(self.options.query) + } + $content.replaceWith( + $content = ( + Ox.ImageViewer({ + area: [], + height: dialogHeight, + imageHeight: self.options.dimensions[1], + imagePreviewURL: url.replace('/1024p', `/${self.options.size}p`), + imageURL: url, + imageWidth: self.options.dimensions[0], + width: dialogWidth + }) + ) + .bindEvent({ + center: function(data) { + /* + pandora.UI.set( + 'documents.' + item.id, + {position: $content.getArea().map(Math.round)} + ); + */ + }, + key_escape: function() { + pandora.$ui.pageDialog.close(); + }, + page: function(data) { + /* + pandora.UI.set( + 'documents.' + item.id, + {position: data.page} + ); + */ + }, + zoom: function(data) { + /* + pandora.UI.set( + 'documents.' + item.id, + {position: $content.getArea().map(Math.round)} + ); + */ + } + }) + ); + } + + function setTitle() { + that.options({ + title: (self.options.title || "") + " Page " + self.options.page + }); + } + + + that.update = function(options) { + self.options = Ox.extend(self.options, options) + setTitle(); + setContent(); + }; + + return that; + +}; + diff --git a/static/js/utils.js b/static/js/utils.js index 7b86670c3..f0bef2449 100644 --- a/static/js/utils.js +++ b/static/js/utils.js @@ -1470,6 +1470,17 @@ pandora.getFindLayer = function() { return key }; +pandora.getFulltextQuery = function() { + if (pandora.user.ui.findDocuments) { + var conditions = pandora.user.ui.findDocuments.conditions.filter(condition => { + return condition.key == 'fulltext' + }) + if (conditions.length) { + return conditions[0].value + } + } +}; + pandora.getHash = function(state, callback) { // FIXME: remove this var embedKeys = [