diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py index b162c70c..91ade121 100644 --- a/pandora/document/fulltext.py +++ b/pandora/document/fulltext.py @@ -59,6 +59,12 @@ class FulltextMixin: return extract_text(self.file.path) elif self.extension == 'epub': return epub.extract_text(self.file.path) + elif self.extension == 'txt': + data = '' + if os.path.exists(self.file.path): + with open(self.file.path) as fd: + data = fd.read() + return data elif self.extension in IMAGE_EXTENSIONS: return ocr_image(self.file.path) elif self.extension in CONVERT_EXTENSIONS: @@ -191,6 +197,12 @@ class FulltextPageMixin(FulltextMixin): elif self.extension == 'epub': # FIXME: is there a nice way to split that into pages return epub.extract_text(self.file.path) + elif self.extension == 'txt': + data = '' + if os.path.exists(self.file.path): + with open(self.file.path) as fd: + data = fd.read() + return data elif self.extension in IMAGE_EXTENSIONS: return ocr_image(self.document.file.path) elif self.extension == 'html': diff --git a/pandora/document/models.py b/pandora/document/models.py index dcca304a..0795b765 100644 --- a/pandora/document/models.py +++ b/pandora/document/models.py @@ -31,6 +31,7 @@ from . import managers from . import utils from . import tasks from . import epub +from . import txt from .fulltext import FulltextMixin, FulltextPageMixin User = get_user_model() @@ -178,6 +179,9 @@ class Document(models.Model, FulltextMixin): elif self.extension == 'epub': prefix = 3 value = self.pages + elif self.extension == 'txt': + prefix = 4 + value = self.pages elif self.extension == 'html': prefix = 1 value = self.dimensions @@ -393,7 +397,7 @@ class Document(models.Model, FulltextMixin): @property def dimensions(self): - if self.extension in ('pdf', 'epub'): + if self.extension in ('pdf', 'epub', 'txt'): return self.pages elif self.extension == 'html': return len(self.data.get('text', '').split(' ')) @@ -574,6 +578,10 @@ class Document(models.Model, FulltextMixin): if data: with open(path, "wb") as fd: fd.write(data) + elif self.extension == 'txt': + path = os.path.join(folder, '1024.jpg') + if os.path.exists(src) and not os.path.exists(path): + txt.render(src, path) elif self.extension in ('jpg', 'png', 'gif', 'webp', 'heic', 'heif', 'cr2'): if os.path.exists(src): if size and page: @@ -622,19 +630,22 @@ class Document(models.Model, FulltextMixin): if thumb: self.width, self.height = open_image_rgb(thumb).size self.pages = 1 + elif self.extension == 'txt': + thumb = self.thumbnail(1024) + if thumb: + self.width, self.height = open_image_rgb(thumb).size + self.pages = 1 elif self.width == -1: self.pages = -1 self.width, self.height = open_image_rgb(self.file.path).size def get_ratio(self): - if self.extension in ('pdf', 'epub'): + if self.extension in ('pdf', 'epub', 'txt'): image = self.thumbnail(1024) try: size = Image.open(image).size except: size = [1, 1] - elif self.extension == 'epub': - size = [1, 1] else: if self.width > 0: size = self.resolution diff --git a/pandora/document/txt.py b/pandora/document/txt.py new file mode 100755 index 00000000..6189b9ac --- /dev/null +++ b/pandora/document/txt.py @@ -0,0 +1,71 @@ +import os + +from PIL import Image +from argparse import ArgumentParser +from ox.image import drawText, wrapText + +from django.conf import settings + + +def decode_line(line): + try: + line = line.decode('utf-8') + except: + try: + line = line.decode('latin-1') + except: + line = line.decode('utf-8', errors='replace') + return line + +def render(infile, outfile): + + with open(infile, 'rb') as f: + + image_size = (768, 1024) + margin = 64 + offset = margin + font_file = settings.TXT_TTF + font_size = 24 + line_height = 32 + max_lines = (image_size[1] - 2 * margin) / line_height + + image = Image.new('L', image_size, (255)) + + for line in f: + line = decode_line(line) + + for line_ in line.strip().split('\r'): + + lines = wrapText( + line_, + image_size[0] - 2 * margin, + # we don't want the last line that ends with an ellipsis + max_lines + 1, + font_file, + font_size + ) + + for line__ in lines: + drawText( + image, + (margin, offset), + line__, + font_file, + font_size, + (0) + ) + offset += line_height + max_lines -= 1 + + if max_lines == 0: + break + + if max_lines == 0: + break + + if max_lines == 0: + break + + image.save(outfile, quality=50) + + diff --git a/pandora/settings.py b/pandora/settings.py index 0aa3d768..7c9a890a 100644 --- a/pandora/settings.py +++ b/pandora/settings.py @@ -313,6 +313,9 @@ EMPTY_CLIPS = True YT_DLP_EXTRA = [] +TXT_TTF = "/usr/share/fonts/truetype/msttcorefonts/Georgia.ttf" +TXT_TTF = "/usr/share/fonts/truetype/noto/NotoSansMono-Regular.ttf" + #you can ignore things below this line #========================================================================= LOCAL_APPS = [] diff --git a/static/js/TXTViewer.js b/static/js/TXTViewer.js new file mode 100644 index 00000000..2b51fb24 --- /dev/null +++ b/static/js/TXTViewer.js @@ -0,0 +1,100 @@ +'use strict'; + +/*@ +Ox.TXTViewer TXT Viewer + options Options + center <[n]|s|'auto'> Center ([x, y] or 'auto') + height Viewer height in px + maxZoom Maximum zoom (minimum zoom is 'fit') + txtjsURL URL to txt.js + url TXT URL + width Viewer width in px + zoom Zoom (number or 'fit' or 'fill') + self Shared private variable + ([options[, self]]) -> TXT Viewer + center Center changed + center <[n]|s> Center + zoom Zoom changed + zoom Zoom + page Page changed + page Page +@*/ +Ox.TXTViewer = function(options, self) { + + self = self || {}; + var that = Ox.Element({}, self) + .defaults({ + center: 'auto', + height: 384, + page: 1, + maxZoom: 16, + url: '', + width: 512, + zoom: 'fit' + }) + .options(options || {}) + .update({ + center: function() { + setCenterAndZoom(); + }, + page: updatePage, + // allow for setting height and width at the same time + height: updateSize, + url: function() { + self.$iframe.postMessage('txt', {txt: self.options.url}); + }, + width: updateSize, + zoom: function() { + setCenterAndZoom(); + } + }) + .addClass('OxTXTViewer') + .on({ + }) + .bindEvent({ + }); + + self.$iframe = Ox.Element('