From 88f9f2d27e20f3d9a962c80182277ca46cc7409c Mon Sep 17 00:00:00 2001 From: j Date: Mon, 14 Jan 2019 18:15:27 +0530 Subject: [PATCH 1/4] fix path --- oml/item/scan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oml/item/scan.py b/oml/item/scan.py index 821cec4..8001989 100644 --- a/oml/item/scan.py +++ b/oml/item/scan.py @@ -50,7 +50,7 @@ def remove_missing(books=None): if dirty: state.db.session.commit() dirty = False - nfd_books = {unicodedata.normalize('NFD', path) for path in nfd_books} + nfd_books = {unicodedata.normalize('NFD', path) for path in books} removed = [ path for path in db_paths if unicodedata.normalize('NFD', path) not in nfd_books From 52f45beaec7a2d8ee37e6104d658bedb609fd1ca Mon Sep 17 00:00:00 2001 From: j Date: Mon, 14 Jan 2019 20:32:34 +0530 Subject: [PATCH 2/4] compare NFC --- oml/item/api.py | 2 ++ oml/item/models.py | 4 ++-- oml/item/scan.py | 8 +++----- oml/utils.py | 4 ++++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/oml/item/api.py b/oml/item/api.py index 4493233..6385e7f 100644 --- a/oml/item/api.py +++ b/oml/item/api.py @@ -3,6 +3,7 @@ import json import hashlib import os +import unicodedata from sqlalchemy.orm import load_only from sqlalchemy.sql.expression import text @@ -211,6 +212,7 @@ def autocomplete(data): qs = qs.filter(models.Find.item_id.in_(items)) if data['value']: value = data['value'].lower() + value = unicodedata.normalize('NFKD', value) qs = qs.filter(models.Find.key.is_(data['key'])) if op == '=': qs = qs.filter(models.Find.findvalue.contains(value)) diff --git a/oml/item/models.py b/oml/item/models.py index 959979a..f936838 100644 --- a/oml/item/models.py +++ b/oml/item/models.py @@ -23,7 +23,7 @@ from .icons import icons from .person import get_sort_name, Person from queryparser import Parser from settings import config -from utils import remove_empty_folders, get_ratio +from utils import remove_empty_folders, get_ratio, same_path from websocket import trigger_event import db import media @@ -788,7 +788,7 @@ class File(db.Model): new_path = os.path.join(first, author, filename) if current_path == os.path.join(prefix, new_path): break - if unicodedata.normalize('NFD', self.path) != unicodedata.normalize('NFD', new_path): + if not same_path(self.path, new_path): path = os.path.join(prefix, new_path) ox.makedirs(os.path.dirname(path)) mode = 0o644 diff --git a/oml/item/scan.py b/oml/item/scan.py index 8001989..c8c7a5a 100644 --- a/oml/item/scan.py +++ b/oml/item/scan.py @@ -14,7 +14,7 @@ import ox from changelog import add_record from item.models import File, Item from user.models import List -from utils import remove_empty_folders +from utils import remove_empty_folders, same_path from websocket import trigger_event import db import media @@ -132,8 +132,6 @@ def collect_books(prefix, status=None): logger.debug('found %s books', len(books)) return books -def nfd_same(f1, f2): - return unicodedata.normalize('NFD', f1) == unicodedata.normalize('NFD', f2) def run_scan(): logger.debug('run_scan') @@ -155,7 +153,7 @@ def run_scan(): if file: f1 = file.fullpath() f2 = os.path.join(prefix, f) - if not nfd_same(f1, f2) and os.path.exists(f1) and os.path.exists(f2): + if not same_path(f1, f2) and os.path.exists(f1) and os.path.exists(f2): logger.debug('file exists in multiple locations %s', id) logger.debug('"%s" vs "%s"', f1, f2) os.chmod(f2, stat.S_IWRITE) @@ -166,7 +164,7 @@ def run_scan(): if file: f1 = file.fullpath() f2 = os.path.join(prefix, f) - if not nfd_same(f1, f2) and os.path.exists(f1) and os.path.exists(f2): + if not same_path(f1, f2) and os.path.exists(f1) and os.path.exists(f2): logger.debug('"%s" vs "%s"', f1, f2) os.chmod(f2, stat.S_IWRITE) os.unlink(f2) diff --git a/oml/utils.py b/oml/utils.py index 99433ba..7a2047f 100644 --- a/oml/utils.py +++ b/oml/utils.py @@ -15,6 +15,7 @@ import stdnum.isbn import subprocess import sys import time +import unicodedata import ox from OpenSSL.crypto import ( @@ -462,3 +463,6 @@ def iexists(path): return False files = {os.path.basename(f).lower() for f in files} return name in files + +def same_path(f1, f2): + return unicodedata.normalize('NFC', f1) == unicodedata.normalize('NFC', f2) From 6c7d6bb6b084aef666c54c1d0b9f5a09dcee9035 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 15 Jan 2019 13:20:11 +0530 Subject: [PATCH 3/4] for update --- oml/item/api.py | 4 +++- oml/item/models.py | 7 +++++-- oml/media/pdf.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/oml/item/api.py b/oml/item/api.py index 6385e7f..857dcf9 100644 --- a/oml/item/api.py +++ b/oml/item/api.py @@ -146,13 +146,15 @@ def edit(data): ids = [ids] edited = [] for id in ids: - item = models.Item.get(id) + state.db.session.begin(subtransactions=True) + item = models.Item.get(id, for_update=True) if item and item.json().get('mediastate') == 'available': item.edit(data) response = item.json() edited.append(id) else: logger.info('can only edit available items %s', id) + state.db.session.commit() if len(ids) > 1: response = data response['id'] = edited diff --git a/oml/item/models.py b/oml/item/models.py index f936838..b2d5c3a 100644 --- a/oml/item/models.py +++ b/oml/item/models.py @@ -75,10 +75,13 @@ class Item(db.Model): self.meta = {} @classmethod - def get(cls, id): + def get(cls, id, for_update=False): if isinstance(id, list): id = base64.b32encode(hashlib.sha1(''.join(id)).digest()) - return cls.query.filter_by(id=id).first() + qs = cls.query.filter_by(id=id) + if for_update: + qs = qs.with_for_update() + return qs.first() @classmethod def get_or_create(cls, id, info=None): diff --git a/oml/media/pdf.py b/oml/media/pdf.py index fdf5a01..c4e9719 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -209,7 +209,7 @@ def info(pdf): if settings.server['extract_text']: text = extract_text(pdf) data['textsize'] = len(text) - if not 'isbn' in data: + if 'isbn' not in data: isbn = extract_isbn(text) if isbn: data['isbn'] = isbn From 1c8a5c3764d9637b877f498ac248dacc4bf59b44 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 15 Jan 2019 14:08:42 +0530 Subject: [PATCH 4/4] fulltext search in macosx --- config.json | 6 ++++++ oml/fulltext.py | 28 ++++++++++++++++++++++++++++ oml/queryparser.py | 17 +++++++++++++++-- oml/settings.py | 8 ++++++++ oml/user/api.py | 2 ++ 5 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 oml/fulltext.py diff --git a/config.json b/config.json index 813dff5..6c410b3 100644 --- a/config.json +++ b/config.json @@ -241,6 +241,12 @@ "format": {"type": "boolean", "args": []}, "sort": true }, + { + "id": "fulltext", + "title": "Full Text", + "find": true, + "type": "text" + }, { "id": "random", "title": "Random", diff --git a/oml/fulltext.py b/oml/fulltext.py new file mode 100644 index 0000000..579427d --- /dev/null +++ b/oml/fulltext.py @@ -0,0 +1,28 @@ +import logging +import os +import subprocess +import sys + + +logger = logging.getLogger(__name__) + +def find_fulltext_macos(query): + import settings + from item.models import File + prefix = os.path.join(os.path.expanduser(settings.preferences['libraryPath']), 'Books/') + cmd = ["mdfind", "-onlyin", prefix, query] + books = subprocess.check_output(cmd).decode().strip().split('\n') + books = [path[len(prefix):] for path in books] + ids = [b[0] for b in File.query.filter(operators.in_op(File.path, books)).values('sha1')] + return ids + +def find_fulltext(query): + ids = [] + if sys.platform == 'darwin': + ids = find_fulltext_macos(query) + else: + logger.debug('missing fulltext search implementation for %s', sys.platform) + return ids + +def platform_supported(): + return sys.platform == 'darwin' diff --git a/oml/queryparser.py b/oml/queryparser.py index 6ad4a5d..27acd42 100644 --- a/oml/queryparser.py +++ b/oml/queryparser.py @@ -9,6 +9,7 @@ from sqlalchemy.sql.expression import text import utils import settings +from fulltext import find_fulltext import logging logger = logging.getLogger(__name__) @@ -25,7 +26,7 @@ def get_operator(op, type='str'): '$': operators.endswith_op, '&': operators.in_op, }, - 'int': { + 'int': { '==': operators.eq, '>': operators.gt, '>=': operators.ge, @@ -65,7 +66,7 @@ class Parser(object): ... ''' #logger.debug('parse_condition %s', condition) - if not 'value' in condition: + if 'value' not in condition: return None k = condition.get('key', '*') if not k: @@ -122,6 +123,18 @@ class Parser(object): in_op = operators.notin_op if exclude else operators.in_op q = in_op(self._model.id, ids) return q + elif k == 'fulltext': + ids = find_fulltext(v) + if ids: + in_op = operators.notin_op if exclude else operators.in_op + q = in_op(self._model.id, ids) + else: + # nothing + q = operators.eq(self._model.id, -1) + if exclude: + q = ~q + return q + elif key_type in ("string", "text"): if isinstance(v, str): v = unicodedata.normalize('NFKD', v).lower() diff --git a/oml/settings.py b/oml/settings.py index c66722f..345182a 100644 --- a/oml/settings.py +++ b/oml/settings.py @@ -6,6 +6,7 @@ import os from oml.pdict import pdict from oml.utils import get_user_id +from oml import fulltext base_dir = os.path.normpath(os.path.join(os.path.abspath(os.path.dirname(__file__)), '..')) static_path = os.path.join(base_dir, 'static') @@ -90,3 +91,10 @@ DEBUG_HTTP = server.get('debug_http', False) DEBUG_API = server.get('debug_api', False) DB_VERSION = 13 + + +FULLTEXT_SUPPORT = fulltext.platform_supported() + +if not FULLTEXT_SUPPORT: + config['itemKeys'] = [k for k in config['itemKeys'] if k['id'] != 'fulltext'] + diff --git a/oml/user/api.py b/oml/user/api.py index 242b5fa..63216e9 100644 --- a/oml/user/api.py +++ b/oml/user/api.py @@ -35,6 +35,8 @@ def init(data): if os.path.exists(settings.oml_data_path): with open(settings.oml_data_path) as fd: config = json.load(fd) + if not settings.FULLTEXT_SUPPORT: + config['itemKeys'] = [k for k in config['itemKeys'] if k['id'] != 'fulltext'] else: config = {} response['config'] = config