render search result highlights as images and show in pages view

This commit is contained in:
j 2023-06-12 14:30:32 +01:00
parent 57d3fc0d32
commit 48e6d4af6f
8 changed files with 273 additions and 12 deletions

View file

@ -122,6 +122,57 @@ class FulltextMixin:
from_ += len(res['hits']['hits']) from_ += len(res['hits']['hits'])
return ids return ids
def highlight_page(self, page, query, size):
import pypdfium2 as pdfium
from PIL import Image
from PIL import ImageDraw
pdfpath = self.file.path
pagenumber = int(page) - 1
jpg = tempfile.NamedTemporaryFile(suffix='.jpg')
output = jpg.name
TINT_COLOR = (255, 255, 0)
TRANSPARENCY = .45
OPACITY = int(255 * TRANSPARENCY)
scale = 150/72
pdf = pdfium.PdfDocument(pdfpath)
page = pdf[pagenumber]
bitmap = page.render(scale=scale, rotation=0)
img = bitmap.to_pil().convert('RGBA')
overlay = Image.new('RGBA', img.size, TINT_COLOR+(0,))
draw = ImageDraw.Draw(overlay)
textpage = page.get_textpage()
search = textpage.search(query)
result = search.get_next()
while result:
pos, steps = result
steps += 1
while steps:
box = textpage.get_charbox(pos)
box = [b*scale for b in box]
tl = (box[0], img.size[1] - box[3])
br = (box[2], img.size[1] - box[1])
draw.rectangle((tl, br), fill=TINT_COLOR+(OPACITY,))
pos += 1
steps -= 1
result = search.get_next()
img = Image.alpha_composite(img, overlay)
img = img.convert("RGB")
aspect = img.size[0] / img.size[1]
resize_method = Image.ANTIALIAS
if img.size[0] >= img.size[1]:
width = size
height = int(size / aspect)
else:
width = int(size / aspect)
height = size
img = img.resize((width, height), resize_method)
img.save(output, quality=72)
return jpg
class FulltextPageMixin(FulltextMixin): class FulltextPageMixin(FulltextMixin):
_ES_INDEX = "document-page-index" _ES_INDEX = "document-page-index"

View file

@ -54,8 +54,6 @@ def parseCondition(condition, user, item=None, owner=None):
if not op: if not op:
op = '=' op = '='
print(k, op, v)
if op.startswith('!'): if op.startswith('!'):
return buildCondition(k, op[1:], v, user, True, owner=owner) return buildCondition(k, op[1:], v, user, True, owner=owner)
else: else:
@ -136,7 +134,6 @@ def buildCondition(k, op, v, user, exclude=False, owner=None):
q = Q(id=0) q = Q(id=0)
return q return q
elif key_config.get('fulltext'): elif key_config.get('fulltext'):
print('fulltext?')
qs = models.Page.find_fulltext_ids(v) qs = models.Page.find_fulltext_ids(v)
q = Q(id__in=qs) q = Q(id__in=qs)
if exclude: if exclude:

View file

@ -14,6 +14,7 @@ from oxdjango.shortcuts import render_to_json_response, get_object_or_404_json,
from django import forms from django import forms
from django.db.models import Count, Sum from django.db.models import Count, Sum
from django.conf import settings from django.conf import settings
from django.http import HttpResponse
from item import utils from item import utils
from item.models import Item from item.models import Item
@ -381,8 +382,12 @@ def file(request, id, name=None):
def thumbnail(request, id, size=256, page=None): def thumbnail(request, id, size=256, page=None):
size = int(size) size = int(size)
document = get_document_or_404_json(request, id) document = get_document_or_404_json(request, id)
if "q" in request.GET and page:
img = document.highlight_page(page, request.GET["q"], size)
return HttpResponse(img, content_type="image/jpeg")
return HttpFileResponse(document.thumbnail(size, page=page)) return HttpFileResponse(document.thumbnail(size, page=page))
@login_required_json @login_required_json
def upload(request): def upload(request):
if 'id' in request.GET: if 'id' in request.GET:

View file

@ -14,3 +14,5 @@ yt-dlp>=2022.3.8.2
python-memcached python-memcached
elasticsearch<8 elasticsearch<8
future future
pytz
pypdfium2

View file

@ -153,7 +153,8 @@ pandora.ui.collection = function() {
id: data.id, id: data.id,
pages: data.pages, pages: data.pages,
query: ui.findDocuments, query: ui.findDocuments,
ratio: data.ratio ratio: data.ratio,
title: data.title
} }
} }
}; };
@ -164,7 +165,7 @@ pandora.ui.collection = function() {
}), callback); }), callback);
return Ox.clone(data, true); return Ox.clone(data, true);
}, },
keys: ['id', 'pages', 'title', 'ratio', 'modified'], keys: ['pages'].concat(keys),
selected: ui.listSelection, selected: ui.listSelection,
size: 192, size: 192,
sort: ui.collectionSort.concat([ sort: ui.collectionSort.concat([

View file

@ -25,8 +25,12 @@ pandora.ui.documentPages = function(options) {
self.width = self.options.ratio > 1 ? self.size : Math.round(self.size * self.options.ratio); self.width = self.options.ratio > 1 ? self.size : Math.round(self.size * self.options.ratio);
self.height = self.options.ratio > 1 ? Math.round(self.size / self.options.ratio) : self.size; self.height = self.options.ratio > 1 ? Math.round(self.size / self.options.ratio) : self.size;
function renderPage(page) { function renderPage(page, query) {
self.pages.push(page)
var url = `/documents/${self.options.id}/${self.size}p${page}.jpg` var url = `/documents/${self.options.id}/${self.size}p${page}.jpg`
if (query) {
url += '?q=' + encodeURIComponent(query)
}
var $item = Ox.IconItem({ var $item = Ox.IconItem({
imageHeight: self.height, imageHeight: self.height,
imageWidth: self.width, imageWidth: self.width,
@ -45,12 +49,11 @@ pandora.ui.documentPages = function(options) {
that.append($item); that.append($item);
} }
function renderPages(pages) { function renderPages(pages, query) {
console.log('renderPages', pages, self.options.pages) self.pages = []
if (pages) { if (pages) {
console.log('renderPages', pages)
pages.forEach(page => { pages.forEach(page => {
renderPage(page.page) renderPage(page.page, query)
}) })
} else { } else {
if (self.options.pages > 1) { if (self.options.pages > 1) {
@ -78,7 +81,7 @@ pandora.ui.documentPages = function(options) {
range: [0, 100], range: [0, 100],
keys: ['page'] keys: ['page']
}, function(result) { }, function(result) {
renderPages(result.data.items) renderPages(result.data.items, pandora.getFulltextQuery())
}) })
} else { } else {
renderPages() renderPages()
@ -97,7 +100,31 @@ pandora.ui.documentPages = function(options) {
} }
function singleclick(data) { function singleclick(data) {
// .. var $item, $target = $(data.target), annotation, item, points, set;
if ($target.parent().parent().is('.OxSpecialTarget')) {
$target = $target.parent().parent();
}
if ($target.is('.OxSpecialTarget')) {
$item = $target.parent().parent();
var page = $item.data('page')
if (!pandora.$ui.pageDialog) {
pandora.$ui.pageDialog = pandora.ui.pageDialog({
document: self.options.id,
page: page,
pages: self.pages,
query: pandora.getFulltextQuery(),
dimensions: [self.width, self.height],
title: self.options.title,
size: self.size
});
pandora.$ui.pageDialog.open()
} else {
pandora.$ui.pageDialog.update({
page: page,
pages: self.pages,
});
}
}
} }
return that; return that;

167
static/js/pageDialog.js Normal file
View file

@ -0,0 +1,167 @@
'use strict';
pandora.ui.pageDialog = function(options, self) {
self = self || {}
self.options = Ox.extend({
}, options);
console.log(options)
var dialogHeight = Math.round((window.innerHeight - 48) * 0.9) - 24,
dialogWidth = Math.round(window.innerWidth * 0.9) - 48,
isItemView = !pandora.$ui.documentsDialog,
$content = Ox.Element(),
that = Ox.Dialog({
closeButton: true,
content: $content,
focus: false,
height: dialogHeight,
maximizeButton: true,
minHeight: 256,
minWidth: 512,
padding: 0,
removeOnClose: true,
title: '',
width: dialogWidth
})
.bindEvent({
close: function() {
delete pandora.$ui.pageDialog;
},
resize: function(data) {
dialogHeight = data.height;
dialogWidth = data.width;
$content.options({
height: dialogHeight,
width: dialogWidth
});
},
}),
$infoButton = Ox.Button({
title: 'info',
tooltip: Ox._('Open PDF'),
type: 'image'
})
.css({
position: 'absolute',
right: '24px',
top: '4px'
})
.bindEvent({
click: function(data) {
that.close();
pandora.URL.push(`/documents/${self.options.document}/${self.options.page}`);
}
}),
$selectButton = Ox.ButtonGroup({
buttons: [
{id: 'previous', title: 'left', tooltip: Ox._('Previous')},
{id: 'next', title: 'right', tooltip: Ox._('Next')}
],
type: 'image'
})
.css({
position: 'absolute',
right: '44px',
top: '4px'
})
[self.options.pages.length > 1 ? 'show' : 'hide']()
.bindEvent({
click: function(data) {
var pageIdx = self.options.pages.indexOf(self.options.page)
if (data.id == 'previous') {
pageIdx--
} else {
pageIdx++
}
if (pageIdx < 0) {
pageIdx = self.options.pages.length - 1
} else if (pageIdx >= self.options.pages.length) {
pageIdx = 0
}
that.update({
page: self.options.pages[pageIdx]
})
}
});
$(that.find('.OxBar')[0])
.append($infoButton)
.append($selectButton);
// fixme: why is this needed?
$(that.find('.OxContent')[0]).css({overflow: 'hidden'});
setTitle();
setContent();
function setContent() {
var url = `/documents/${self.options.document}/1024p${self.options.page}.jpg`
if (self.options.query) {
url += '?q=' + encodeURIComponent(self.options.query)
}
$content.replaceWith(
$content = (
Ox.ImageViewer({
area: [],
height: dialogHeight,
imageHeight: self.options.dimensions[1],
imagePreviewURL: url.replace('/1024p', `/${self.options.size}p`),
imageURL: url,
imageWidth: self.options.dimensions[0],
width: dialogWidth
})
)
.bindEvent({
center: function(data) {
/*
pandora.UI.set(
'documents.' + item.id,
{position: $content.getArea().map(Math.round)}
);
*/
},
key_escape: function() {
pandora.$ui.pageDialog.close();
},
page: function(data) {
/*
pandora.UI.set(
'documents.' + item.id,
{position: data.page}
);
*/
},
zoom: function(data) {
/*
pandora.UI.set(
'documents.' + item.id,
{position: $content.getArea().map(Math.round)}
);
*/
}
})
);
}
function setTitle() {
that.options({
title: (self.options.title || "") + " Page " + self.options.page
});
}
that.update = function(options) {
self.options = Ox.extend(self.options, options)
setTitle();
setContent();
};
return that;
};

View file

@ -1470,6 +1470,17 @@ pandora.getFindLayer = function() {
return key return key
}; };
pandora.getFulltextQuery = function() {
if (pandora.user.ui.findDocuments) {
var conditions = pandora.user.ui.findDocuments.conditions.filter(condition => {
return condition.key == 'fulltext'
})
if (conditions.length) {
return conditions[0].value
}
}
};
pandora.getHash = function(state, callback) { pandora.getHash = function(state, callback) {
// FIXME: remove this // FIXME: remove this
var embedKeys = [ var embedKeys = [