diff --git a/oml/media/pdf.py b/oml/media/pdf.py index bc342d0..75313b2 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -237,10 +237,13 @@ def info(pdf): def extract_text(pdf): if sys.platform == 'win32': pdf = get_short_path_name(pdf) + cmd = ['pdftotext', pdf, '-'] if sys.platform == 'darwin': - cmd = ['/usr/bin/mdimport', '-d2', pdf] - else: - cmd = ['pdftotext', pdf, '-'] + pdftotext = ['/usr/local/bin/pdftotext', pdf, '-'] + if os.path.exists(pdftotext[0]): + cmd = pdftotext + else: + cmd = ['/usr/bin/mdimport', '-d2', pdf] if sys.platform == 'win32': startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW @@ -251,7 +254,7 @@ def extract_text(pdf): stdout, stderr = p.communicate() stdout = stdout.decode() stderr = stderr.decode() - if sys.platform == 'darwin': + if sys.platform == 'darwin' and cmd[0] == '/usr/bin/mdimport': if 'kMDItemTextContent' in stderr: stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2] else: