From 771cf460b435cd9776ed4671cc898118740e8aa6 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 6 Jul 2025 21:40:31 +0100 Subject: [PATCH] minimal support for txt documents --- pandora/document/fulltext.py | 12 + pandora/document/models.py | 19 +- pandora/document/txt.py | 71 +++++ pandora/settings.py | 3 + static/js/document.js | 10 + static/js/documentDialog.js | 20 ++ static/js/utils.js | 2 +- static/txt.js/index.html | 13 + static/txt.js/txt.js | 516 +++++++++++++++++++++++++++++++++++ 9 files changed, 661 insertions(+), 5 deletions(-) create mode 100755 pandora/document/txt.py create mode 100644 static/txt.js/index.html create mode 100644 static/txt.js/txt.js diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py index b162c70c..91ade121 100644 --- a/pandora/document/fulltext.py +++ b/pandora/document/fulltext.py @@ -59,6 +59,12 @@ class FulltextMixin: return extract_text(self.file.path) elif self.extension == 'epub': return epub.extract_text(self.file.path) + elif self.extension == 'txt': + data = '' + if os.path.exists(self.file.path): + with open(self.file.path) as fd: + data = fd.read() + return data elif self.extension in IMAGE_EXTENSIONS: return ocr_image(self.file.path) elif self.extension in CONVERT_EXTENSIONS: @@ -191,6 +197,12 @@ class FulltextPageMixin(FulltextMixin): elif self.extension == 'epub': # FIXME: is there a nice way to split that into pages return epub.extract_text(self.file.path) + elif self.extension == 'txt': + data = '' + if os.path.exists(self.file.path): + with open(self.file.path) as fd: + data = fd.read() + return data elif self.extension in IMAGE_EXTENSIONS: return ocr_image(self.document.file.path) elif self.extension == 'html': diff --git a/pandora/document/models.py b/pandora/document/models.py index dcca304a..0795b765 100644 --- a/pandora/document/models.py +++ b/pandora/document/models.py @@ -31,6 +31,7 @@ from . import managers from . import utils from . import tasks from . import epub +from . import txt from .fulltext import FulltextMixin, FulltextPageMixin User = get_user_model() @@ -178,6 +179,9 @@ class Document(models.Model, FulltextMixin): elif self.extension == 'epub': prefix = 3 value = self.pages + elif self.extension == 'txt': + prefix = 4 + value = self.pages elif self.extension == 'html': prefix = 1 value = self.dimensions @@ -393,7 +397,7 @@ class Document(models.Model, FulltextMixin): @property def dimensions(self): - if self.extension in ('pdf', 'epub'): + if self.extension in ('pdf', 'epub', 'txt'): return self.pages elif self.extension == 'html': return len(self.data.get('text', '').split(' ')) @@ -574,6 +578,10 @@ class Document(models.Model, FulltextMixin): if data: with open(path, "wb") as fd: fd.write(data) + elif self.extension == 'txt': + path = os.path.join(folder, '1024.jpg') + if os.path.exists(src) and not os.path.exists(path): + txt.render(src, path) elif self.extension in ('jpg', 'png', 'gif', 'webp', 'heic', 'heif', 'cr2'): if os.path.exists(src): if size and page: @@ -622,19 +630,22 @@ class Document(models.Model, FulltextMixin): if thumb: self.width, self.height = open_image_rgb(thumb).size self.pages = 1 + elif self.extension == 'txt': + thumb = self.thumbnail(1024) + if thumb: + self.width, self.height = open_image_rgb(thumb).size + self.pages = 1 elif self.width == -1: self.pages = -1 self.width, self.height = open_image_rgb(self.file.path).size def get_ratio(self): - if self.extension in ('pdf', 'epub'): + if self.extension in ('pdf', 'epub', 'txt'): image = self.thumbnail(1024) try: size = Image.open(image).size except: size = [1, 1] - elif self.extension == 'epub': - size = [1, 1] else: if self.width > 0: size = self.resolution diff --git a/pandora/document/txt.py b/pandora/document/txt.py new file mode 100755 index 00000000..6189b9ac --- /dev/null +++ b/pandora/document/txt.py @@ -0,0 +1,71 @@ +import os + +from PIL import Image +from argparse import ArgumentParser +from ox.image import drawText, wrapText + +from django.conf import settings + + +def decode_line(line): + try: + line = line.decode('utf-8') + except: + try: + line = line.decode('latin-1') + except: + line = line.decode('utf-8', errors='replace') + return line + +def render(infile, outfile): + + with open(infile, 'rb') as f: + + image_size = (768, 1024) + margin = 64 + offset = margin + font_file = settings.TXT_TTF + font_size = 24 + line_height = 32 + max_lines = (image_size[1] - 2 * margin) / line_height + + image = Image.new('L', image_size, (255)) + + for line in f: + line = decode_line(line) + + for line_ in line.strip().split('\r'): + + lines = wrapText( + line_, + image_size[0] - 2 * margin, + # we don't want the last line that ends with an ellipsis + max_lines + 1, + font_file, + font_size + ) + + for line__ in lines: + drawText( + image, + (margin, offset), + line__, + font_file, + font_size, + (0) + ) + offset += line_height + max_lines -= 1 + + if max_lines == 0: + break + + if max_lines == 0: + break + + if max_lines == 0: + break + + image.save(outfile, quality=50) + + diff --git a/pandora/settings.py b/pandora/settings.py index 0aa3d768..7c9a890a 100644 --- a/pandora/settings.py +++ b/pandora/settings.py @@ -313,6 +313,9 @@ EMPTY_CLIPS = True YT_DLP_EXTRA = [] +TXT_TTF = "/usr/share/fonts/truetype/msttcorefonts/Georgia.ttf" +TXT_TTF = "/usr/share/fonts/truetype/noto/NotoSansMono-Regular.ttf" + #you can ignore things below this line #========================================================================= LOCAL_APPS = [] diff --git a/static/js/document.js b/static/js/document.js index 702b9e45..324207a5 100644 --- a/static/js/document.js +++ b/static/js/document.js @@ -87,6 +87,16 @@ pandora.ui.document = function() { width: that.width(), zoom: 'fit' }) + : item.extension == 'txt' + ? Ox.TXTViewer({ + height: that.height() - 16, + page: pandora.user.ui.documents[item.id] + ? pandora.user.ui.documents[item.id].position + : 1, + url: '/documents/' + item.id + '/' + pandora.safeDocumentName(item.title) + '.' + item.extension + '?' + item.modified, + width: that.width(), + zoom: 'fit' + }) : item.extension == 'html' ? pandora.$ui.textPanel = pandora.ui.textPanel(item, $toolbar) : Ox.ImageViewer({ diff --git a/static/js/documentDialog.js b/static/js/documentDialog.js index f993be94..844895b4 100644 --- a/static/js/documentDialog.js +++ b/static/js/documentDialog.js @@ -200,6 +200,26 @@ pandora.ui.documentDialog = function(options) { width: dialogWidth, zoom: 'fit' }) + : item.extension == 'epub' + ? Ox.EpubViewer({ + height: dialogHeight, + page: pandora.user.ui.documents[item.id] + ? pandora.user.ui.documents[item.id].position + : 1, + url: '/documents/' + item.id + '/epub/', + width: dialogWidth, + zoom: 'fit' + }) + : item.extension == 'txt' + ? Ox.TXTViewer({ + height: dialogHeight, + page: pandora.user.ui.documents[item.id] + ? pandora.user.ui.documents[item.id].position + : 1, + url: '/documents/' + item.id + '/' + pandora.safeDocumentName(item.title) + '.' + item.extension + '?' + item.modified, + width: dialogWidth, + zoom: 'fit' + }) : item.extension == 'html' ? pandora.$ui.textPanel = pandora.ui.textPanel(item) : Ox.ImageViewer({ diff --git a/static/js/utils.js b/static/js/utils.js index eb010ad1..52f93d32 100644 --- a/static/js/utils.js +++ b/static/js/utils.js @@ -434,7 +434,7 @@ pandora.imageExtensions = [ ]; pandora.documentExtensions = [ - 'pdf', 'epub' /* , 'txt', */ + 'pdf', 'epub', 'txt' ].concat(pandora.imageExtensions); pandora.uploadDroppedFiles = function(files) { diff --git a/static/txt.js/index.html b/static/txt.js/index.html new file mode 100644 index 00000000..559835ca --- /dev/null +++ b/static/txt.js/index.html @@ -0,0 +1,13 @@ + + + + + + + + + + diff --git a/static/txt.js/txt.js b/static/txt.js/txt.js new file mode 100644 index 00000000..b4cc1e65 --- /dev/null +++ b/static/txt.js/txt.js @@ -0,0 +1,516 @@ +let txtjs = {} + +Ox.load({UI: {loadCSS: false}}, function() { + Ox.$parent.bindMessage(function(data, event) { + txtjs.onMessage(data, event) + }) +}) + +txtjs.open = function(url) { + fetch(url).then(function(response) { + return response.text() + }).then(txtjs.renderText) +} + +txtjs.mark = function(notes) { + notes.forEach(function(note) { + if (!txtjs.notes.includes(note)) { + txtjs.notes.push(note) + } + txtjs.renderNote(note) + }) +} + +txtjs.notes = [] + +txtjs.addNoteFromSelection = function() { + let note = txtjs.getNoteFromSelection() + if (!note || txtjs.noteExists(note)) { + return + } + txtjs.renderNote(note) + txtjs.notes.push(note) + txtjs.selectNote(note.id) + getSelection().removeAllRanges() + txtjs.postMessage('addNote', note) +} + +txtjs.beginEdit = function() { + let selected = txtjs.getSelectedNote() + if (!selected || !selected.elements[0].classList.contains('editable')) { + return + } + selected.elements.forEach(function(element) { + element.classList.add('editing') + }) +} + +txtjs.cancelEdit = function() { + let editing = Array.from(document.querySelectorAll('g.editing')) + editing.forEach(function(element) { + element.classList.remove('editing') + }) +} + +txtjs.createSVGElement = function(name) { + return document.createElementNS('http://www.w3.org/2000/svg', name) +} + +txtjs.editNote = function() { + let editing = Array.from(document.querySelectorAll('g.editing')) + let note = txtjs.getNoteFromSelection() + if (editing.length == 0 || !note) { + return + } + let id = txtjs.getNoteId(editing[0]) + note = Object.assign(Ox.getObjectById(txtjs.notes, id), { + position: note.position, + text: note.text + }) + editing.forEach(function(element) { + element.parentElement.removeChild(element) + }) + txtjs.renderNote(note) + getSelection().removeAllRanges() + txtjs.postMessage('editNote', { + id: id, + position: note.position, + text: note.text + }) + txtjs.selectNote(note.id) +} + +txtjs.getNewId = function() { + let ids = txtjs.notes.map(function(note) { + return note.id + }) + let i = 1 + while (ids.includes(Ox.encodeBase26(i))) { + i++ + } + return Ox.encodeBase26(i) +} + +txtjs.getNoteId = function(element) { + let classNames = Array.from(element.classList).filter(function(className) { + return className.startsWith('note-') + }) + if (classNames.length == 0) { + return + } + return classNames[0].substr(5) +} + +txtjs.getNoteFromSelection = function() { + let selection = getSelection() + try { + var range = selection.getRangeAt(0) + } catch(e) { + return + } + if (range.collapsed) { + return + } + let container = range.commonAncestorContainer + if (container.id != 'txt') { + while (container != document.body) { + container = container.parentElement + if (container.id == 'txt') { + break + } + } + } + if (container.id != 'txt') { + return + } + let position = txtjs.getPosition(range) + let pos = position.split(',').map(function(v) { + return parseInt(v) + }) + let note = { + id: txtjs.getNewId(), + position: position, + text: txtjs.text.substr(pos[0], pos[1] - pos[0]), + editable: true + } + if (txtjs.noteExists(note)) { + return + } + return note +} + +txtjs.getPosition = function(range) { + let container = document.querySelector('#txt') + let nodes = Array.from(container.childNodes) + let startNodeIndex = range.startContainer == container + ? range.startOffset : nodes.indexOf(range.startContainer) + let endNodeIndex = range.endContainer == container + ? range.endOffset : nodes.indexOf(range.endContainer) + let startOffset = range.startContainer == container ? 0 : range.startOffset + let endOffset = range.endContainer == container ? 0 : range.endOffset + let index = 0 + let start = 0 + let end = 0 + for (let i = 0; i <= endNodeIndex; i++) { + if (i == startNodeIndex) { + start = index + startOffset + } + if (i == endNodeIndex) { + end = index + endOffset + } + if (nodes[i].nodeType == 1) { //
+ index++ + } else { + index += nodes[i].textContent.length + } + } + while (' \n'.includes(txtjs.text.substr(start, 1))) { + start++ + } + while (' \n'.includes(txtjs.text.substr(end - 1, 1))) { + end-- + } + return start + ',' + end +} + +txtjs.getRange = function(id, start, end) { + let startContainer, startOffset, endContainer, endOffset + let container = document.querySelector('#' + id) + let nodes = Array.from(container.childNodes) + let index = 0 + for (let i = 0; i < nodes.length; i++) { + if (start < index + nodes[i].textContent.length && startOffset === void 0) { + startContainer = nodes[i] + startOffset = start - index + } + if (end <= index + nodes[i].textContent.length) { + endContainer = nodes[i] + endOffset = end - index + break + } + if (nodes[i].nodeType == 1) { //
+ index++ + } else { + index += nodes[i].textContent.length + } + } + let range = document.createRange() + range.setStart(startContainer, startOffset) + range.setEnd(endContainer, endOffset) + return range +} + +txtjs.getSelectedNote = function() { + let elements = Array.from(document.querySelectorAll('g.selected')) + if (elements.length == 0) { + return + } + let id = txtjs.getNoteId(elements[0]) + return Object.assign(Ox.getObjectById(txtjs.notes, id), { + elements: elements + }) +} + +txtjs.noteExists = function(note) { + return txtjs.notes.some(function(note_) { + return note_.position == note.position + }) +} + +txtjs.onMessage = function(data, event) { + console.log('onMessage', event, data) + if (event == 'selectAnnotation') { + txtjs.selectNote(data.id, false) + } else if (event == 'addAnnotation') { + txtjs.addNoteFromSelection() + } else if (event == 'addAnnotations') { + if (data.reset) { + // fixme + } + data.annotations.forEach(function(note) { + //// + note.position = note.position.replace(':', ',') + //// + txtjs.renderNote(note) + txtjs.notes.push(note) + }) + } else if (event == 'removeAnnotation') { + txtjs.selectNote(data.id) + txtjs.removeNote() + } +} + +txtjs.postMessage = function(action, data) { + console.log('postMessage', action, data) + Ox.$parent.postMessage(action.replace('Note', 'Annotation'), data) +} + +txtjs.removeNote = function() { + let selected = txtjs.getSelectedNote() + if (!selected) { + return + } + let id = txtjs.getNoteId(selected.elements[0]) + selected.elements.forEach(function(element) { + element.parentElement.removeChild(element) + }) + let index = txtjs.notes.map(function(note) { + return note.id + }).indexOf(id) + txtjs.notes.splice(index, 1) + txtjs.postMessage('removeNote', { + id: id + }) +} + +txtjs.renderNote = function(note) { + let pos = note.position.split(',').map(function(v) { + return parseInt(v) + }) + let ids = ['txt', 'txt-scroll'] + ids.forEach(function(id) { + let range = txtjs.getRange(id, pos[0], pos[1]) + let rects = Array.from(range.getClientRects()) + let size = rects.reduce(function(width, rect) { + return width + rect.width + }, 0) + let maxHeight = 8192 + let firstIndex = Math.floor((rects[0].top + window.pageYOffset) / maxHeight) + let lastIndex = Math.floor((rects[rects.length - 1].top + window.pageYOffset + rects[rects.length - 1].height) / maxHeight) + for (let index = firstIndex; index <= lastIndex; index++) { + let g = txtjs.createSVGElement('g') + g.classList.add('note-' + note.id) + g.classList.add('selectable') + if (note.editable) { + g.classList.add('editable') + } + g.setAttribute('data-size', size) + g.setAttribute('pointer-events', id == 'txt' ? 'all' : 'none') + rects.forEach(function(rect) { + let element = txtjs.createSVGElement('rect') + let x = id == 'txt' ? rect.left + : rect.left - document.querySelector('#scroll').getBoundingClientRect().left + 8 + let y = id == 'txt' ? rect.top + window.pageYOffset - index * maxHeight + : rect.top + document.querySelector('#scroll').scrollTop - 16 - index * maxHeight + element.setAttribute('x', x) + element.setAttribute('y', y) + element.setAttribute('width', rect.width) + element.setAttribute('height', rect.height) + g.appendChild(element) + }) + let svg = document.querySelector('svg#svg-' + id + '-' + index) + for (let i = 0; i < svg.children.length; i++) { + let childSize = parseInt(svg.children[i].getAttribute('data-size')) + if (size > childSize) { + svg.insertBefore(g, svg.children[i]) + break + } + } + if (!g.parentElement) { + svg.appendChild(g) + } + } + }) +} + +txtjs.renderSVG = function(id, index, width, height) { + function mousedown(e) { + svg.addEventListener('mouseup', mouseup) + timeout = setTimeout(function() { + svg.removeEventListener('mouseup', mouseup) + e.target.classList.remove('selectable') + e.target.setAttribute('pointer-events', 'none') + document.addEventListener('mouseup', function() { + e.target.classList.add('selectable') + e.target.setAttribute('pointer-events', 'all') + }) + }, 250) + } + function mouseup(e) { + clearTimeout(timeout) + txtjs.selectNote(txtjs.getNoteId(e.target.parentElement)) + } + let timeout + let svg = txtjs.createSVGElement('svg') + svg.setAttribute('id', 'svg-' + id + '-' + index) + svg.setAttribute('pointer-events', 'none') + if (id == 'txt') { + svg.style.left = 0 + } else { + svg.style.right = '8px' + } + svg.style.top = index * 8192 + 'px' + svg.style.width = width + 'px' + svg.style.height = height + 'px' + if (id == 'txt') { + svg.addEventListener('mousedown', mousedown) + } + let parentElement = id == 'txt' ? document.body : document.querySelector('#scroll') + parentElement.appendChild(svg) +} + +txtjs.renderSVGs = function() { + let maxHeight = 8192 + let ids = ['txt', 'txt-scroll'] + ids.forEach(function(id) { + let rect = document.querySelector('#' + id).getBoundingClientRect() + let lastHeight = rect.height % maxHeight + let n = Math.ceil(rect.height / maxHeight) + for (let i = 0; i < n; i++) { + txtjs.renderSVG(id, i, rect.width, i < n - 1 ? maxHeight : lastHeight) + } + }) +} + +txtjs.renderText = function(text) { + txtjs.text = text + html = text.replace(//g, '>') + html = html.replace().replace(/\r\n/g, '\n').replace(/[\r\n]/g, '
') + txtjs.html = Ox.encodeHTMLEntities(text).replace(/\r\n/g, '\n').replace(/[\r\n]/g, '
') + window.addEventListener('mouseup', onMouseup) + window.addEventListener('resize', onResize) + window.addEventListener('scroll', onScroll) + document.addEventListener('keydown', function(e) { + console.log(e.keyCode) + if (e.keyCode == 8 || e.keyCode == 46) { // BACKSPACE || DELETE + txtjs.removeNote() + } else if (e.keyCode == 13) { // ENTER + if (e.shiftKey) { + txtjs.beginEdit() + } else if (document.querySelector('g.editing')) { + txtjs.editNote() + } else { + txtjs.addNoteFromSelection() + } + } else if (e.keyCode == 27) { // ESCAPE + if (document.querySelector('g.editing')) { + txtjs.cancelEdit() + } + if (document.querySelector('g.selected')) { + txtjs.selectNote(null) + } + } else if (e.keyCode == 37) { // LEFT + txtjs.selectNextNote(-1) + } else if (e.keyCode == 39) { // RIGHT + txtjs.selectNextNote(1) + } + }) + let style = document.createElement('style') + style.innerText = [ + 'svg { mix-blend-mode: multiply; position: absolute }', + 'g { fill: rgb(255, 255, 192); fill-opacity: 0.5 }', + 'g.selectable { cursor: pointer }', + 'g.editable { fill: rgb(255, 255, 0) }', + 'g.selected { fill: rgb(224, 240, 255) }', + 'g.editable.selected { fill: rgb(128, 192, 255) }', + 'g.editable.editing { fill: rgb(128, 255, 128) }', + '::selection { background: rgb(192, 192, 192) }' + ].join('\n') + document.head.appendChild(style) + document.body.style.backgroundColor = 'rgb(255, 255, 255)' + document.body.style.margin = 0 + document.body.style.overflowX = 'hidden' + let textElement = document.createElement('div') + textElement.id = 'txt' + textElement.style.fontFamily = 'Georgia, Palatino, DejaVu Serif, Book Antiqua, Palatino Linotype, Times New Roman, serif', + textElement.style.fontSize = '20px' + textElement.style.lineHeight = '30px' + textElement.style.padding = '10% 20% 10% 10%' + textElement.innerHTML = txtjs.html + textElement.addEventListener('mousedown', function() { + txtjs.selectNote(null) + }) + document.body.appendChild(textElement) + let scrollElement = document.createElement('div') + scrollElement.id = 'scroll' + scrollElement.style.bottom = '16px' + scrollElement.style.overflow = 'hidden' + scrollElement.style.position = 'fixed' + scrollElement.style.right = '24px' + scrollElement.style.width = '7%' + scrollElement.style.top = '16px' + document.body.appendChild(scrollElement) + let scrollTextElement = document.createElement('div') + scrollTextElement.id = 'txt-scroll' + scrollTextElement.style.cursor = 'pointer' + scrollTextElement.style.fontFamily = 'Georgia, Palatino, DejaVu Serif, Book Antiqua, Palatino Linotype, Times New Roman, serif', + scrollTextElement.style.fontSize = '2px' + scrollTextElement.style.lineHeight = '3px' + scrollTextElement.style.MozUserSelect = 'none' + scrollTextElement.style.WebkitUserSelect = 'none' + scrollTextElement.innerHTML = txtjs.html + scrollTextElement.addEventListener('mousedown', function(e) { + let offset = 'offsetY' in e ? e.offsetY : e.layerY + document.documentElement.scrollTop = offset / factor + margin - 16 + }) + scrollElement.appendChild(scrollTextElement) + txtjs.renderSVGs() + let factor, margin + onResize() + function onMouseup() { + let note = txtjs.getNoteFromSelection() + if (!note || txtjs.noteExists(note)) { + txtjs.postMessage('selectText', null) + } else { + txtjs.postMessage('selectText', note) + } + } + function onResize() { + factor = scrollTextElement.clientHeight / textElement.clientHeight + margin = textElement.offsetWidth * 0.1 + setTimeout(function() { + Array.from(document.querySelectorAll('svg')).forEach(function(svg) { + svg.parentElement.removeChild(svg) + }) + txtjs.renderSVGs() + txtjs.mark(txtjs.notes) + }) + } + function onScroll() { + scrollElement.scrollTop = (window.pageYOffset - margin + 16) * factor + } +} + +txtjs.selectNextNote = function(direction) { + let selected = txtjs.getSelectedNote() + if (!selected) { + return + } + let id = txtjs.getNoteId(selected.elements[0]) + let ids = txtjs.notes.sort(function(a, b) { + return parseInt(a.position.split()[0]) - parseInt(b.position.split()[0]) + }).map(function(note) { + return note.id + }) + txtjs.selectNote(ids[Ox.mod(ids.indexOf(id) + direction, ids.length)]) +} + +txtjs.selectNote = function(id, trigger) { + let selected = txtjs.getSelectedNote() + if (selected) { + selected.elements.forEach(function(element) { + element.classList.remove('selected') + }) + } + if (id) { + let editing = Array.from(document.querySelectorAll('g.editing')) + if (editing.length && txtjs.getNoteId(editing[0]) != id) { + editing.forEach(function(element) { + element.classList.remove('editing') + }) + } + let elements = Array.from(document.querySelectorAll('g.note-' + id)) + elements.forEach(function(element) { + element.classList.add('selected') + }) + for (let i = 0; i < elements.length; i++) { + if (!elements[i].parentNode.id.includes('scroll')) { + elements[i].scrollIntoViewIfNeeded && elements[i].scrollIntoViewIfNeeded() + break + } + } + } + if (trigger !== false) { + txtjs.postMessage('selectNote', {id: id}) + } +}