render search result highlights as images and show in pages view

2023-06-12 14:30:32 +01:00 · 2023-06-12 14:30:32 +01:00 · 48e6d4af6f
commit 48e6d4af6f
parent 57d3fc0d32
8 changed files with 273 additions and 12 deletions
--- a/pandora/document/fulltext.py
+++ b/pandora/document/fulltext.py
@ -122,6 +122,57 @@ class FulltextMixin:
            from_ += len(res['hits']['hits'])
        return ids

+    def highlight_page(self, page, query, size):
+        import pypdfium2 as pdfium
+        from PIL import Image
+        from PIL import ImageDraw
+
+        pdfpath = self.file.path
+        pagenumber = int(page) - 1
+        jpg = tempfile.NamedTemporaryFile(suffix='.jpg')
+        output = jpg.name
+        TINT_COLOR = (255, 255, 0)
+        TRANSPARENCY = .45
+        OPACITY = int(255 * TRANSPARENCY)
+        scale = 150/72
+
+        pdf = pdfium.PdfDocument(pdfpath)
+        page = pdf[pagenumber]
+
+        bitmap = page.render(scale=scale, rotation=0)
+        img = bitmap.to_pil().convert('RGBA')
+        overlay = Image.new('RGBA', img.size, TINT_COLOR+(0,))
+        draw = ImageDraw.Draw(overlay)
+
+        textpage = page.get_textpage()
+        search = textpage.search(query)
+        result = search.get_next()
+        while result:
+            pos, steps = result
+            steps += 1
+            while steps:
+                box = textpage.get_charbox(pos)
+                box = [b*scale for b in box]
+                tl = (box[0], img.size[1] - box[3])
+                br = (box[2], img.size[1] - box[1])
+                draw.rectangle((tl, br), fill=TINT_COLOR+(OPACITY,))
+                pos += 1
+                steps -= 1
+            result = search.get_next()
+        img = Image.alpha_composite(img, overlay)
+        img = img.convert("RGB")
+        aspect = img.size[0] / img.size[1]
+        resize_method = Image.ANTIALIAS
+        if img.size[0] >= img.size[1]:
+            width = size
+            height = int(size / aspect)
+        else:
+            width = int(size / aspect)
+            height = size
+        img = img.resize((width, height), resize_method)
+        img.save(output, quality=72)
+        return jpg
+

 class FulltextPageMixin(FulltextMixin):
    _ES_INDEX = "document-page-index"
--- a/pandora/document/managers/pages.py
+++ b/pandora/document/managers/pages.py
@ -54,8 +54,6 @@ def parseCondition(condition, user, item=None, owner=None):
    if not op:
        op = '='

-    print(k, op, v)
-
    if op.startswith('!'):
        return buildCondition(k, op[1:], v, user, True, owner=owner)
    else:
@ -136,7 +134,6 @@ def buildCondition(k, op, v, user, exclude=False, owner=None):
                q = Q(id=0)
        return q
    elif key_config.get('fulltext'):
-        print('fulltext?')
        qs = models.Page.find_fulltext_ids(v)
        q = Q(id__in=qs)
        if exclude:
--- a/pandora/document/views.py
+++ b/pandora/document/views.py
@ -14,6 +14,7 @@ from oxdjango.shortcuts import render_to_json_response, get_object_or_404_json,
 from django import forms
 from django.db.models import Count, Sum
 from django.conf import settings
+from django.http import HttpResponse

 from item import utils
 from item.models import Item
@ -381,8 +382,12 @@ def file(request, id, name=None):
 def thumbnail(request, id, size=256, page=None):
    size = int(size)
    document = get_document_or_404_json(request, id)
+    if "q" in request.GET and page:
+        img = document.highlight_page(page, request.GET["q"], size)
+        return HttpResponse(img, content_type="image/jpeg")
    return HttpFileResponse(document.thumbnail(size, page=page))

+
@login_required_json
 def upload(request):
    if 'id' in request.GET: