fix cbz

use pdftotext if available
only try to download if we have a url
2019-02-01 17:53:17 +05:30 · 2019-02-01 17:38:31 +05:30 · 2019-02-01 17:38:31 +05:30 · 2019-02-01 17:38:31 +05:30 · 2019-02-01 17:38:31 +05:30 · 2019-02-01 17:38:31 +05:30
6 changed files with 27 additions and 14 deletions
--- a/2
+++ b/2
@ -56,7 +56,7 @@ else
            ARCH="_armv7l"
        fi
        PLATFORM="linux$ARCH"
-        if [ -e $BASE/platform_${PLATFORM}/lib/libunrar.so ]; then
+        if [ -e "$BASE/platform_${PLATFORM}/lib/libunrar.so" ]; then
            export UNRAR_LIB_PATH="$BASE/platform_${PLATFORM}/lib/libunrar.so"
        fi
    fi
--- a/oml/item/models.py
+++ b/oml/item/models.py
@ -843,15 +843,17 @@ def download_cover(id):
            url = i.meta.get('cover')
        else:
            url = None
            cover = None
    logger.debug('download cover %s %s', id, url)
    ratio = None
-    try:
+    if url:
-        cover = ox.net.read_url(url)
+        try:
-        ratio = get_ratio(cover)
+            cover = ox.net.read_url(url)
-    except:
+            ratio = get_ratio(cover)
-        logger.debug('unable to read cover url %s', url)
+        except:
-        cover = None
+            logger.debug('unable to read cover url %s', url)
            cover = None
    with db.session():
        i = Item.get(id, for_update=True)
        if i:
--- a/oml/media/cbz.py
+++ b/oml/media/cbz.py
@ -1,9 +1,14 @@
 # -*- coding: utf-8 -*-
 import os
 import zipfile
 import logging
 import ox
 logger = logging.getLogger(__name__)
 def cover(path):
    data = None
    logger.debug('cover %s', path)
--- a/oml/media/epub.py
+++ b/oml/media/epub.py
@ -153,7 +153,7 @@ def info(epub):
                            contents.append(txt)
                if contents:
                    data['tableofcontents'] = '\n'.join(contents).strip()
-        if not 'tableofcontents' in data:
+        if 'tableofcontents' not in data:
            guide = info.find('{http://www.idpf.org/2007/opf}guide')
            if guide:
                for ref in guide.findall('{http://www.idpf.org/2007/opf}reference'):
@ -169,7 +169,7 @@ def info(epub):
        data['description'] = strip_tags(decode_html(data['description']))
    text = extract_text(epub)
    data['textsize'] = len(text)
-    if not 'isbn' in data:
+    if 'isbn' not in data:
        isbn = extract_isbn(text)
        if isbn:
            data['isbn'] = isbn
--- a/oml/media/pdf.py
+++ b/oml/media/pdf.py
@ -237,10 +237,13 @@ def info(pdf):
 def extract_text(pdf):
    if sys.platform == 'win32':
        pdf = get_short_path_name(pdf)
    cmd = ['pdftotext', pdf, '-']
    if sys.platform == 'darwin':
-        cmd = ['/usr/bin/mdimport', '-d2', pdf]
+        pdftotext = ['/usr/local/bin/pdftotext', pdf, '-']
-    else:
+        if os.path.exists(pdftotext[0]):
-        cmd = ['pdftotext', pdf, '-']
+            cmd = pdftotext
        else:
            cmd = ['/usr/bin/mdimport', '-d2', pdf]
    if sys.platform == 'win32':
        startupinfo = subprocess.STARTUPINFO()
        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
@ -251,7 +254,7 @@ def extract_text(pdf):
    stdout, stderr = p.communicate()
    stdout = stdout.decode()
    stderr = stderr.decode()
-    if sys.platform == 'darwin':
+    if sys.platform == 'darwin' and cmd[0] == '/usr/bin/mdimport':
        if 'kMDItemTextContent' in stderr:
            stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2]
        else:
--- a/oml/tasks.py
+++ b/oml/tasks.py
@ -36,7 +36,10 @@ class Tasks(Thread):
    def run(self):
        self.load_tasks()
-        self.queue('scan')
+        if time.mktime(time.gmtime()) - settings.server.get('last_scan', 0) > 24*60*60:
            settings.server['last_scan'] = time.mktime(time.gmtime())
            self.queue('scan')
        import item.scan
        from item.models import sync_metadata, get_preview, get_cover
        from user.models import (
Author	SHA1	Message	Date
j	4e4b853416	fix cbz	2019-02-01 17:53:17 +05:30
j	c38d3a8b35	use pdftotext if available	2019-02-01 17:38:31 +05:30
j	debe3823e2	only try to download if we have a url	2019-02-01 17:38:31 +05:30
j	e623307d19	paths with spaces	2019-02-01 17:38:31 +05:30
j	1421e402e1	PEP8	2019-02-01 17:38:31 +05:30
j	853f6cdc0c	only scan once per day	2019-02-01 17:38:31 +05:30