From 853f6cdc0c02338bca4bb55448a5cd4975889f1e Mon Sep 17 00:00:00 2001 From: j Date: Fri, 1 Feb 2019 16:43:05 +0530 Subject: [PATCH 1/6] only scan once per day --- oml/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/oml/tasks.py b/oml/tasks.py index 3f97379..93d3b45 100644 --- a/oml/tasks.py +++ b/oml/tasks.py @@ -36,7 +36,10 @@ class Tasks(Thread): def run(self): self.load_tasks() - self.queue('scan') + if time.mktime(time.gmtime()) - settings.server.get('last_scan', 0) > 24*60*60: + settings.server['last_scan'] = time.mktime(time.gmtime()) + self.queue('scan') + import item.scan from item.models import sync_metadata, get_preview, get_cover from user.models import ( From 1421e402e13e07a4c426dd0430b7c16a1afa01a4 Mon Sep 17 00:00:00 2001 From: j Date: Fri, 1 Feb 2019 16:43:20 +0530 Subject: [PATCH 2/6] PEP8 --- oml/media/epub.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oml/media/epub.py b/oml/media/epub.py index cd5a6f0..552a134 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -153,7 +153,7 @@ def info(epub): contents.append(txt) if contents: data['tableofcontents'] = '\n'.join(contents).strip() - if not 'tableofcontents' in data: + if 'tableofcontents' not in data: guide = info.find('{http://www.idpf.org/2007/opf}guide') if guide: for ref in guide.findall('{http://www.idpf.org/2007/opf}reference'): @@ -169,7 +169,7 @@ def info(epub): data['description'] = strip_tags(decode_html(data['description'])) text = extract_text(epub) data['textsize'] = len(text) - if not 'isbn' in data: + if 'isbn' not in data: isbn = extract_isbn(text) if isbn: data['isbn'] = isbn From e623307d190bea5e8110b44aa5bf8cc9a145f088 Mon Sep 17 00:00:00 2001 From: j Date: Fri, 1 Feb 2019 16:43:30 +0530 Subject: [PATCH 3/6] paths with spaces --- ctl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctl b/ctl index da49820..8195061 100755 --- a/ctl +++ b/ctl @@ -56,7 +56,7 @@ else ARCH="_armv7l" fi PLATFORM="linux$ARCH" - if [ -e $BASE/platform_${PLATFORM}/lib/libunrar.so ]; then + if [ -e "$BASE/platform_${PLATFORM}/lib/libunrar.so" ]; then export UNRAR_LIB_PATH="$BASE/platform_${PLATFORM}/lib/libunrar.so" fi fi From debe3823e2a83ef5d624786a8dc34d222c0cf475 Mon Sep 17 00:00:00 2001 From: j Date: Fri, 1 Feb 2019 16:43:55 +0530 Subject: [PATCH 4/6] only try to download if we have a url --- oml/item/models.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/oml/item/models.py b/oml/item/models.py index a2b200a..72d6b8e 100644 --- a/oml/item/models.py +++ b/oml/item/models.py @@ -843,15 +843,17 @@ def download_cover(id): url = i.meta.get('cover') else: url = None + cover = None logger.debug('download cover %s %s', id, url) ratio = None - try: - cover = ox.net.read_url(url) - ratio = get_ratio(cover) - except: - logger.debug('unable to read cover url %s', url) - cover = None + if url: + try: + cover = ox.net.read_url(url) + ratio = get_ratio(cover) + except: + logger.debug('unable to read cover url %s', url) + cover = None with db.session(): i = Item.get(id, for_update=True) if i: From c38d3a8b3523b0fff74d9a4456887f22f5087671 Mon Sep 17 00:00:00 2001 From: j Date: Fri, 1 Feb 2019 17:36:03 +0530 Subject: [PATCH 5/6] use pdftotext if available --- oml/media/pdf.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/oml/media/pdf.py b/oml/media/pdf.py index bc342d0..75313b2 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -237,10 +237,13 @@ def info(pdf): def extract_text(pdf): if sys.platform == 'win32': pdf = get_short_path_name(pdf) + cmd = ['pdftotext', pdf, '-'] if sys.platform == 'darwin': - cmd = ['/usr/bin/mdimport', '-d2', pdf] - else: - cmd = ['pdftotext', pdf, '-'] + pdftotext = ['/usr/local/bin/pdftotext', pdf, '-'] + if os.path.exists(pdftotext[0]): + cmd = pdftotext + else: + cmd = ['/usr/bin/mdimport', '-d2', pdf] if sys.platform == 'win32': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW @@ -251,7 +254,7 @@ def extract_text(pdf): stdout, stderr = p.communicate() stdout = stdout.decode() stderr = stderr.decode() - if sys.platform == 'darwin': + if sys.platform == 'darwin' and cmd[0] == '/usr/bin/mdimport': if 'kMDItemTextContent' in stderr: stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2] else: From 4e4b853416b5899dbad5e8cb38610cdca7547a45 Mon Sep 17 00:00:00 2001 From: j Date: Fri, 1 Feb 2019 17:53:17 +0530 Subject: [PATCH 6/6] fix cbz --- oml/media/cbz.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/oml/media/cbz.py b/oml/media/cbz.py index b08c972..60f0672 100644 --- a/oml/media/cbz.py +++ b/oml/media/cbz.py @@ -1,9 +1,14 @@ # -*- coding: utf-8 -*- import os import zipfile +import logging import ox + +logger = logging.getLogger(__name__) + + def cover(path): data = None logger.debug('cover %s', path)