render search result highlights as images and show in pages view

This commit is contained in:
j 2023-06-12 14:30:32 +01:00
commit 48e6d4af6f
8 changed files with 273 additions and 12 deletions

View file

@ -122,6 +122,57 @@ class FulltextMixin:
from_ += len(res['hits']['hits'])
return ids
def highlight_page(self, page, query, size):
import pypdfium2 as pdfium
from PIL import Image
from PIL import ImageDraw
pdfpath = self.file.path
pagenumber = int(page) - 1
jpg = tempfile.NamedTemporaryFile(suffix='.jpg')
output = jpg.name
TINT_COLOR = (255, 255, 0)
TRANSPARENCY = .45
OPACITY = int(255 * TRANSPARENCY)
scale = 150/72
pdf = pdfium.PdfDocument(pdfpath)
page = pdf[pagenumber]
bitmap = page.render(scale=scale, rotation=0)
img = bitmap.to_pil().convert('RGBA')
overlay = Image.new('RGBA', img.size, TINT_COLOR+(0,))
draw = ImageDraw.Draw(overlay)
textpage = page.get_textpage()
search = textpage.search(query)
result = search.get_next()
while result:
pos, steps = result
steps += 1
while steps:
box = textpage.get_charbox(pos)
box = [b*scale for b in box]
tl = (box[0], img.size[1] - box[3])
br = (box[2], img.size[1] - box[1])
draw.rectangle((tl, br), fill=TINT_COLOR+(OPACITY,))
pos += 1
steps -= 1
result = search.get_next()
img = Image.alpha_composite(img, overlay)
img = img.convert("RGB")
aspect = img.size[0] / img.size[1]
resize_method = Image.ANTIALIAS
if img.size[0] >= img.size[1]:
width = size
height = int(size / aspect)
else:
width = int(size / aspect)
height = size
img = img.resize((width, height), resize_method)
img.save(output, quality=72)
return jpg
class FulltextPageMixin(FulltextMixin):
_ES_INDEX = "document-page-index"

View file

@ -54,8 +54,6 @@ def parseCondition(condition, user, item=None, owner=None):
if not op:
op = '='
print(k, op, v)
if op.startswith('!'):
return buildCondition(k, op[1:], v, user, True, owner=owner)
else:
@ -136,7 +134,6 @@ def buildCondition(k, op, v, user, exclude=False, owner=None):
q = Q(id=0)
return q
elif key_config.get('fulltext'):
print('fulltext?')
qs = models.Page.find_fulltext_ids(v)
q = Q(id__in=qs)
if exclude:

View file

@ -14,6 +14,7 @@ from oxdjango.shortcuts import render_to_json_response, get_object_or_404_json,
from django import forms
from django.db.models import Count, Sum
from django.conf import settings
from django.http import HttpResponse
from item import utils
from item.models import Item
@ -381,8 +382,12 @@ def file(request, id, name=None):
def thumbnail(request, id, size=256, page=None):
size = int(size)
document = get_document_or_404_json(request, id)
if "q" in request.GET and page:
img = document.highlight_page(page, request.GET["q"], size)
return HttpResponse(img, content_type="image/jpeg")
return HttpFileResponse(document.thumbnail(size, page=page))
@login_required_json
def upload(request):
if 'id' in request.GET: