openmedialibrary/oml/fulltext.py

66 lines
1.9 KiB
Python
Raw Permalink Normal View History

2019-01-15 08:38:42 +00:00
import logging
import os
import subprocess
import sys
2019-02-12 08:27:27 +00:00
from urllib.parse import quote, unquote
2019-01-15 08:38:42 +00:00
2019-01-15 08:43:52 +00:00
from sqlalchemy.sql import operators
2019-01-15 08:38:42 +00:00
logger = logging.getLogger(__name__)
2019-01-29 13:41:51 +00:00
def get_prefix():
2019-01-15 08:38:42 +00:00
import settings
2019-01-29 13:41:51 +00:00
return os.path.join(os.path.expanduser(settings.preferences['libraryPath']), 'Books/')
def get_ids(books):
2019-01-15 08:38:42 +00:00
from item.models import File
2019-02-12 08:27:27 +00:00
if not books:
return []
2019-01-29 13:41:51 +00:00
ids = [b[0] for b in File.query.filter(operators.in_op(File.path, books)).values('sha1')]
return ids
def find_fulltext_macos(query):
prefix = get_prefix()
2019-01-15 08:38:42 +00:00
cmd = ["mdfind", "-onlyin", prefix, query]
books = subprocess.check_output(cmd).decode().strip().split('\n')
books = [path[len(prefix):] for path in books]
2019-01-29 13:41:51 +00:00
return get_ids(books)
def find_fulltext_windows(query):
prefix = get_prefix()
prefix = os.path.normpath(prefix)
cmd = ['findstr', '/i', '/c:' + query, '/d:' + prefix]
books = subprocess.check_output(cmd).decode().strip().split('\n')
books = [b.split(':')[0] for b in books]
return get_ids(books, prefix)
2019-01-15 08:38:42 +00:00
2019-02-12 08:06:45 +00:00
def find_fulltext_linux(query):
prefix = get_prefix()
2019-02-12 08:27:27 +00:00
prefix_url = quote(prefix)
2019-02-12 08:06:45 +00:00
cmd = [
'tracker',
'sparql',
'-q',
2019-02-12 08:27:27 +00:00
"SELECT nie:url(?f) WHERE { ?f fts:match '%s' FILTER (tracker:uri-is-descendant ('file://%s', nie:url (?f))) }" % (query, prefix_url)
2019-02-12 08:06:45 +00:00
]
books = subprocess.check_output(cmd).decode().strip().split('\n')
2019-02-12 08:27:27 +00:00
books = [
unquote(r.strip()).replace('file://', '')[len(prefix):]
for r in books if r.strip().startswith('file://')
]
return get_ids(books)
2019-02-12 08:06:45 +00:00
2019-01-15 08:38:42 +00:00
def find_fulltext(query):
ids = []
if sys.platform == 'darwin':
ids = find_fulltext_macos(query)
2019-02-12 08:27:27 +00:00
elif sys.platform == 'linux':
ids = find_fulltext_linux(query)
2019-01-15 08:38:42 +00:00
else:
logger.debug('missing fulltext search implementation for %s', sys.platform)
return ids
def platform_supported():
2019-02-12 08:27:27 +00:00
return sys.platform in ('darwin', 'linux')