render search result highlights as images and show in pages view

This commit is contained in:
j 2023-06-12 14:30:32 +01:00
parent 57d3fc0d32
commit 48e6d4af6f
8 changed files with 273 additions and 12 deletions

View File

@ -122,6 +122,57 @@ class FulltextMixin:
from_ += len(res['hits']['hits'])
return ids
def highlight_page(self, page, query, size):
import pypdfium2 as pdfium
from PIL import Image
from PIL import ImageDraw
pdfpath = self.file.path
pagenumber = int(page) - 1
jpg = tempfile.NamedTemporaryFile(suffix='.jpg')
output = jpg.name
TINT_COLOR = (255, 255, 0)
TRANSPARENCY = .45
OPACITY = int(255 * TRANSPARENCY)
scale = 150/72
pdf = pdfium.PdfDocument(pdfpath)
page = pdf[pagenumber]
bitmap = page.render(scale=scale, rotation=0)
img = bitmap.to_pil().convert('RGBA')
overlay = Image.new('RGBA', img.size, TINT_COLOR+(0,))
draw = ImageDraw.Draw(overlay)
textpage = page.get_textpage()
search = textpage.search(query)
result = search.get_next()
while result:
pos, steps = result
steps += 1
while steps:
box = textpage.get_charbox(pos)
box = [b*scale for b in box]
tl = (box[0], img.size[1] - box[3])
br = (box[2], img.size[1] - box[1])
draw.rectangle((tl, br), fill=TINT_COLOR+(OPACITY,))
pos += 1
steps -= 1
result = search.get_next()
img = Image.alpha_composite(img, overlay)
img = img.convert("RGB")
aspect = img.size[0] / img.size[1]
resize_method = Image.ANTIALIAS
if img.size[0] >= img.size[1]:
width = size
height = int(size / aspect)
else:
width = int(size / aspect)
height = size
img = img.resize((width, height), resize_method)
img.save(output, quality=72)
return jpg
class FulltextPageMixin(FulltextMixin):
_ES_INDEX = "document-page-index"

View File

@ -54,8 +54,6 @@ def parseCondition(condition, user, item=None, owner=None):
if not op:
op = '='
print(k, op, v)
if op.startswith('!'):
return buildCondition(k, op[1:], v, user, True, owner=owner)
else:
@ -136,7 +134,6 @@ def buildCondition(k, op, v, user, exclude=False, owner=None):
q = Q(id=0)
return q
elif key_config.get('fulltext'):
print('fulltext?')
qs = models.Page.find_fulltext_ids(v)
q = Q(id__in=qs)
if exclude:

View File

@ -14,6 +14,7 @@ from oxdjango.shortcuts import render_to_json_response, get_object_or_404_json,
from django import forms
from django.db.models import Count, Sum
from django.conf import settings
from django.http import HttpResponse
from item import utils
from item.models import Item
@ -381,8 +382,12 @@ def file(request, id, name=None):
def thumbnail(request, id, size=256, page=None):
size = int(size)
document = get_document_or_404_json(request, id)
if "q" in request.GET and page:
img = document.highlight_page(page, request.GET["q"], size)
return HttpResponse(img, content_type="image/jpeg")
return HttpFileResponse(document.thumbnail(size, page=page))
@login_required_json
def upload(request):
if 'id' in request.GET:

View File

@ -14,3 +14,5 @@ yt-dlp>=2022.3.8.2
python-memcached
elasticsearch<8
future
pytz
pypdfium2

View File

@ -153,7 +153,8 @@ pandora.ui.collection = function() {
id: data.id,
pages: data.pages,
query: ui.findDocuments,
ratio: data.ratio
ratio: data.ratio,
title: data.title
}
}
};
@ -164,7 +165,7 @@ pandora.ui.collection = function() {
}), callback);
return Ox.clone(data, true);
},
keys: ['id', 'pages', 'title', 'ratio', 'modified'],
keys: ['pages'].concat(keys),
selected: ui.listSelection,
size: 192,
sort: ui.collectionSort.concat([

View File

@ -25,8 +25,12 @@ pandora.ui.documentPages = function(options) {
self.width = self.options.ratio > 1 ? self.size : Math.round(self.size * self.options.ratio);
self.height = self.options.ratio > 1 ? Math.round(self.size / self.options.ratio) : self.size;
function renderPage(page) {
function renderPage(page, query) {
self.pages.push(page)
var url = `/documents/${self.options.id}/${self.size}p${page}.jpg`
if (query) {
url += '?q=' + encodeURIComponent(query)
}
var $item = Ox.IconItem({
imageHeight: self.height,
imageWidth: self.width,
@ -45,12 +49,11 @@ pandora.ui.documentPages = function(options) {
that.append($item);
}
function renderPages(pages) {
console.log('renderPages', pages, self.options.pages)
function renderPages(pages, query) {
self.pages = []
if (pages) {
console.log('renderPages', pages)
pages.forEach(page => {
renderPage(page.page)
renderPage(page.page, query)
})
} else {
if (self.options.pages > 1) {
@ -78,7 +81,7 @@ pandora.ui.documentPages = function(options) {
range: [0, 100],
keys: ['page']
}, function(result) {
renderPages(result.data.items)
renderPages(result.data.items, pandora.getFulltextQuery())
})
} else {
renderPages()
@ -97,7 +100,31 @@ pandora.ui.documentPages = function(options) {
}
function singleclick(data) {
// ..
var $item, $target = $(data.target), annotation, item, points, set;
if ($target.parent().parent().is('.OxSpecialTarget')) {
$target = $target.parent().parent();
}
if ($target.is('.OxSpecialTarget')) {
$item = $target.parent().parent();
var page = $item.data('page')
if (!pandora.$ui.pageDialog) {
pandora.$ui.pageDialog = pandora.ui.pageDialog({
document: self.options.id,
page: page,
pages: self.pages,
query: pandora.getFulltextQuery(),
dimensions: [self.width, self.height],
title: self.options.title,
size: self.size
});
pandora.$ui.pageDialog.open()
} else {
pandora.$ui.pageDialog.update({
page: page,
pages: self.pages,
});
}
}
}
return that;

167
static/js/pageDialog.js Normal file
View File

@ -0,0 +1,167 @@
'use strict';
pandora.ui.pageDialog = function(options, self) {
self = self || {}
self.options = Ox.extend({
}, options);
console.log(options)
var dialogHeight = Math.round((window.innerHeight - 48) * 0.9) - 24,
dialogWidth = Math.round(window.innerWidth * 0.9) - 48,
isItemView = !pandora.$ui.documentsDialog,
$content = Ox.Element(),
that = Ox.Dialog({
closeButton: true,
content: $content,
focus: false,
height: dialogHeight,
maximizeButton: true,
minHeight: 256,
minWidth: 512,
padding: 0,
removeOnClose: true,
title: '',
width: dialogWidth
})
.bindEvent({
close: function() {
delete pandora.$ui.pageDialog;
},
resize: function(data) {
dialogHeight = data.height;
dialogWidth = data.width;
$content.options({
height: dialogHeight,
width: dialogWidth
});
},
}),
$infoButton = Ox.Button({
title: 'info',
tooltip: Ox._('Open PDF'),
type: 'image'
})
.css({
position: 'absolute',
right: '24px',
top: '4px'
})
.bindEvent({
click: function(data) {
that.close();
pandora.URL.push(`/documents/${self.options.document}/${self.options.page}`);
}
}),
$selectButton = Ox.ButtonGroup({
buttons: [
{id: 'previous', title: 'left', tooltip: Ox._('Previous')},
{id: 'next', title: 'right', tooltip: Ox._('Next')}
],
type: 'image'
})
.css({
position: 'absolute',
right: '44px',
top: '4px'
})
[self.options.pages.length > 1 ? 'show' : 'hide']()
.bindEvent({
click: function(data) {
var pageIdx = self.options.pages.indexOf(self.options.page)
if (data.id == 'previous') {
pageIdx--
} else {
pageIdx++
}
if (pageIdx < 0) {
pageIdx = self.options.pages.length - 1
} else if (pageIdx >= self.options.pages.length) {
pageIdx = 0
}
that.update({
page: self.options.pages[pageIdx]
})
}
});
$(that.find('.OxBar')[0])
.append($infoButton)
.append($selectButton);
// fixme: why is this needed?
$(that.find('.OxContent')[0]).css({overflow: 'hidden'});
setTitle();
setContent();
function setContent() {
var url = `/documents/${self.options.document}/1024p${self.options.page}.jpg`
if (self.options.query) {
url += '?q=' + encodeURIComponent(self.options.query)
}
$content.replaceWith(
$content = (
Ox.ImageViewer({
area: [],
height: dialogHeight,
imageHeight: self.options.dimensions[1],
imagePreviewURL: url.replace('/1024p', `/${self.options.size}p`),
imageURL: url,
imageWidth: self.options.dimensions[0],
width: dialogWidth
})
)
.bindEvent({
center: function(data) {
/*
pandora.UI.set(
'documents.' + item.id,
{position: $content.getArea().map(Math.round)}
);
*/
},
key_escape: function() {
pandora.$ui.pageDialog.close();
},
page: function(data) {
/*
pandora.UI.set(
'documents.' + item.id,
{position: data.page}
);
*/
},
zoom: function(data) {
/*
pandora.UI.set(
'documents.' + item.id,
{position: $content.getArea().map(Math.round)}
);
*/
}
})
);
}
function setTitle() {
that.options({
title: (self.options.title || "") + " Page " + self.options.page
});
}
that.update = function(options) {
self.options = Ox.extend(self.options, options)
setTitle();
setContent();
};
return that;
};

View File

@ -1470,6 +1470,17 @@ pandora.getFindLayer = function() {
return key
};
pandora.getFulltextQuery = function() {
if (pandora.user.ui.findDocuments) {
var conditions = pandora.user.ui.findDocuments.conditions.filter(condition => {
return condition.key == 'fulltext'
})
if (conditions.length) {
return conditions[0].value
}
}
};
pandora.getHash = function(state, callback) {
// FIXME: remove this
var embedKeys = [