fulltext search in macosx

for update
compare NFC
2019-01-15 14:08:42 +05:30 · 2019-01-15 13:20:11 +05:30 · 2019-01-14 20:32:34 +05:30 · 2019-01-14 18:15:27 +05:30
10 changed files with 80 additions and 14 deletions
--- a/config.json
+++ b/config.json
@ -241,6 +241,12 @@
            "format": {"type": "boolean", "args": []},
            "sort": true
        },
        {
            "id": "fulltext",
            "title": "Full Text",
            "find": true,
            "type": "text"
        },
        {
            "id": "random",
            "title": "Random",
--- a/oml/fulltext.py
+++ b/oml/fulltext.py
@ -0,0 +1,28 @@
 import logging
 import os
 import subprocess
 import sys
 logger = logging.getLogger(__name__)
 def find_fulltext_macos(query):
    import settings
    from item.models import File
    prefix = os.path.join(os.path.expanduser(settings.preferences['libraryPath']), 'Books/')
    cmd = ["mdfind", "-onlyin", prefix, query]
    books = subprocess.check_output(cmd).decode().strip().split('\n')
    books = [path[len(prefix):] for path in books]
    ids = [b[0] for b in File.query.filter(operators.in_op(File.path, books)).values('sha1')]
    return ids
 def find_fulltext(query):
    ids = []
    if sys.platform == 'darwin':
        ids = find_fulltext_macos(query)
    else:
        logger.debug('missing fulltext search implementation for %s', sys.platform)
    return ids
 def platform_supported():
    return sys.platform == 'darwin'
--- a/oml/item/api.py
+++ b/oml/item/api.py
@ -3,6 +3,7 @@
 import json
 import hashlib
 import os
 import unicodedata
 from sqlalchemy.orm import load_only
 from sqlalchemy.sql.expression import text
@ -145,13 +146,15 @@ def edit(data):
        ids = [ids]
    edited = []
    for id in ids:
-        item = models.Item.get(id)
+        state.db.session.begin(subtransactions=True)
        item = models.Item.get(id, for_update=True)
        if item and item.json().get('mediastate') == 'available':
            item.edit(data)
            response = item.json()
            edited.append(id)
        else:
            logger.info('can only edit available items %s', id)
        state.db.session.commit()
    if len(ids) > 1:
        response = data
        response['id'] = edited
@ -211,6 +214,7 @@ def autocomplete(data):
    qs = qs.filter(models.Find.item_id.in_(items))
    if data['value']:
        value = data['value'].lower()
        value = unicodedata.normalize('NFKD', value)
        qs = qs.filter(models.Find.key.is_(data['key']))
        if op == '=':
            qs = qs.filter(models.Find.findvalue.contains(value))
--- a/oml/item/models.py
+++ b/oml/item/models.py
@ -23,7 +23,7 @@ from .icons import icons
 from .person import get_sort_name, Person
 from queryparser import Parser
 from settings import config
-from utils import remove_empty_folders, get_ratio
+from utils import remove_empty_folders, get_ratio, same_path
 from websocket import trigger_event
 import db
 import media
@ -75,10 +75,13 @@ class Item(db.Model):
        self.meta = {}
    @classmethod
-    def get(cls, id):
+    def get(cls, id, for_update=False):
        if isinstance(id, list):
            id = base64.b32encode(hashlib.sha1(''.join(id)).digest())
-        return cls.query.filter_by(id=id).first()
+        qs = cls.query.filter_by(id=id)
        if for_update:
            qs = qs.with_for_update()
        return qs.first()
    @classmethod
    def get_or_create(cls, id, info=None):
@ -788,7 +791,7 @@ class File(db.Model):
            new_path = os.path.join(first, author, filename)
            if current_path == os.path.join(prefix, new_path):
                break
-        if unicodedata.normalize('NFD', self.path) != unicodedata.normalize('NFD', new_path):
+        if not same_path(self.path, new_path):
            path = os.path.join(prefix, new_path)
            ox.makedirs(os.path.dirname(path))
            mode = 0o644
--- a/oml/item/scan.py
+++ b/oml/item/scan.py
@ -14,7 +14,7 @@ import ox
 from changelog import add_record
 from item.models import File, Item
 from user.models import List
-from utils import remove_empty_folders
+from utils import remove_empty_folders, same_path
 from websocket import trigger_event
 import db
 import media
@ -50,7 +50,7 @@ def remove_missing(books=None):
            if dirty:
                state.db.session.commit()
                dirty = False
-            nfd_books = {unicodedata.normalize('NFD', path) for path in nfd_books}
+            nfd_books = {unicodedata.normalize('NFD', path) for path in books}
            removed = [
                path for path in db_paths
                if unicodedata.normalize('NFD', path) not in nfd_books
@ -132,8 +132,6 @@ def collect_books(prefix, status=None):
    logger.debug('found %s books', len(books))
    return books
 def nfd_same(f1, f2):
    return unicodedata.normalize('NFD', f1) == unicodedata.normalize('NFD', f2)
 def run_scan():
    logger.debug('run_scan')
@ -155,7 +153,7 @@ def run_scan():
                if file:
                    f1 = file.fullpath()
                    f2 = os.path.join(prefix, f)
-                    if not nfd_same(f1, f2) and os.path.exists(f1) and os.path.exists(f2):
+                    if not same_path(f1, f2) and os.path.exists(f1) and os.path.exists(f2):
                        logger.debug('file exists in multiple locations %s', id)
                        logger.debug('"%s" vs "%s"', f1, f2)
                        os.chmod(f2, stat.S_IWRITE)
@ -166,7 +164,7 @@ def run_scan():
                    if file:
                        f1 = file.fullpath()
                        f2 = os.path.join(prefix, f)
-                        if not nfd_same(f1, f2) and os.path.exists(f1) and os.path.exists(f2):
+                        if not same_path(f1, f2) and os.path.exists(f1) and os.path.exists(f2):
                            logger.debug('"%s" vs "%s"', f1, f2)
                            os.chmod(f2, stat.S_IWRITE)
                            os.unlink(f2)
--- a/oml/media/pdf.py
+++ b/oml/media/pdf.py
@ -209,7 +209,7 @@ def info(pdf):
    if settings.server['extract_text']:
        text = extract_text(pdf)
        data['textsize'] = len(text)
-        if not 'isbn' in data:
+        if 'isbn' not in data:
            isbn = extract_isbn(text)
            if isbn:
                data['isbn'] = isbn
--- a/oml/queryparser.py
+++ b/oml/queryparser.py
@ -9,6 +9,7 @@ from sqlalchemy.sql.expression import text
 import utils
 import settings
 from fulltext import find_fulltext
 import logging
 logger = logging.getLogger(__name__)
@ -25,7 +26,7 @@ def get_operator(op, type='str'):
            '$': operators.endswith_op,
            '&': operators.in_op,
        },
-        'int':  {
+        'int': {
            '==': operators.eq,
            '>': operators.gt,
            '>=': operators.ge,
@ -65,7 +66,7 @@ class Parser(object):
        ...
        '''
        #logger.debug('parse_condition %s', condition)
-        if not 'value' in condition:
+        if 'value' not in condition:
            return None
        k = condition.get('key', '*')
        if not k:
@ -122,6 +123,18 @@ class Parser(object):
            in_op = operators.notin_op if exclude else operators.in_op
            q = in_op(self._model.id, ids)
            return q
        elif k == 'fulltext':
            ids = find_fulltext(v)
            if ids:
                in_op = operators.notin_op if exclude else operators.in_op
                q = in_op(self._model.id, ids)
            else:
                # nothing
                q = operators.eq(self._model.id, -1)
                if exclude:
                    q = ~q
            return q
        elif key_type in ("string", "text"):
            if isinstance(v, str):
                v = unicodedata.normalize('NFKD', v).lower()
--- a/oml/settings.py
+++ b/oml/settings.py
@ -6,6 +6,7 @@ import os
 from oml.pdict import pdict
 from oml.utils import get_user_id
 from oml import fulltext
 base_dir = os.path.normpath(os.path.join(os.path.abspath(os.path.dirname(__file__)), '..'))
 static_path = os.path.join(base_dir, 'static')
@ -90,3 +91,10 @@ DEBUG_HTTP = server.get('debug_http', False)
 DEBUG_API = server.get('debug_api', False)
 DB_VERSION = 13
 FULLTEXT_SUPPORT = fulltext.platform_supported()
 if not FULLTEXT_SUPPORT:
    config['itemKeys'] = [k for k in config['itemKeys'] if k['id'] != 'fulltext']
--- a/oml/user/api.py
+++ b/oml/user/api.py
@ -35,6 +35,8 @@ def init(data):
    if os.path.exists(settings.oml_data_path):
        with open(settings.oml_data_path) as fd:
            config = json.load(fd)
            if not settings.FULLTEXT_SUPPORT:
                config['itemKeys'] = [k for k in config['itemKeys'] if k['id'] != 'fulltext']
    else:
        config = {}
    response['config'] = config
--- a/oml/utils.py
+++ b/oml/utils.py
@ -15,6 +15,7 @@ import stdnum.isbn
 import subprocess
 import sys
 import time
 import unicodedata
 import ox
 from OpenSSL.crypto import (
@ -462,3 +463,6 @@ def iexists(path):
        return False
    files = {os.path.basename(f).lower() for f in files}
    return name in files
 def same_path(f1, f2):
    return unicodedata.normalize('NFC', f1) == unicodedata.normalize('NFC', f2)
Author	SHA1	Message	Date
j	1c8a5c3764	fulltext search in macosx	2019-01-15 14:08:42 +05:30
j	6c7d6bb6b0	for update	2019-01-15 13:20:11 +05:30
j	52f45beaec	compare NFC	2019-01-14 20:32:34 +05:30
j	88f9f2d27e	fix path	2019-01-14 18:15:27 +05:30