openmedialibrary/oml/fulltext.py

66 lines
1.9 KiB
Python

import logging
import os
import subprocess
import sys
from urllib.parse import quote, unquote
from sqlalchemy.sql import operators
logger = logging.getLogger(__name__)
def get_prefix():
import settings
return os.path.join(os.path.expanduser(settings.preferences['libraryPath']), 'Books/')
def get_ids(books):
from item.models import File
if not books:
return []
ids = [b[0] for b in File.query.filter(operators.in_op(File.path, books)).values('sha1')]
return ids
def find_fulltext_macos(query):
prefix = get_prefix()
cmd = ["mdfind", "-onlyin", prefix, query]
books = subprocess.check_output(cmd).decode().strip().split('\n')
books = [path[len(prefix):] for path in books]
return get_ids(books)
def find_fulltext_windows(query):
prefix = get_prefix()
prefix = os.path.normpath(prefix)
cmd = ['findstr', '/i', '/c:' + query, '/d:' + prefix]
books = subprocess.check_output(cmd).decode().strip().split('\n')
books = [b.split(':')[0] for b in books]
return get_ids(books, prefix)
def find_fulltext_linux(query):
prefix = get_prefix()
prefix_url = quote(prefix)
cmd = [
'tracker',
'sparql',
'-q',
"SELECT nie:url(?f) WHERE { ?f fts:match '%s' FILTER (tracker:uri-is-descendant ('file://%s', nie:url (?f))) }" % (query, prefix_url)
]
books = subprocess.check_output(cmd).decode().strip().split('\n')
books = [
unquote(r.strip()).replace('file://', '')[len(prefix):]
for r in books if r.strip().startswith('file://')
]
return get_ids(books)
def find_fulltext(query):
ids = []
if sys.platform == 'darwin':
ids = find_fulltext_macos(query)
elif sys.platform == 'linux':
ids = find_fulltext_linux(query)
else:
logger.debug('missing fulltext search implementation for %s', sys.platform)
return ids
def platform_supported():
return sys.platform in ('darwin', 'linux')