use pdftotext if available
This commit is contained in:
parent
debe3823e2
commit
c38d3a8b35
1 changed files with 7 additions and 4 deletions
|
@ -237,10 +237,13 @@ def info(pdf):
|
|||
def extract_text(pdf):
|
||||
if sys.platform == 'win32':
|
||||
pdf = get_short_path_name(pdf)
|
||||
cmd = ['pdftotext', pdf, '-']
|
||||
if sys.platform == 'darwin':
|
||||
cmd = ['/usr/bin/mdimport', '-d2', pdf]
|
||||
else:
|
||||
cmd = ['pdftotext', pdf, '-']
|
||||
pdftotext = ['/usr/local/bin/pdftotext', pdf, '-']
|
||||
if os.path.exists(pdftotext[0]):
|
||||
cmd = pdftotext
|
||||
else:
|
||||
cmd = ['/usr/bin/mdimport', '-d2', pdf]
|
||||
if sys.platform == 'win32':
|
||||
startupinfo = subprocess.STARTUPINFO()
|
||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||
|
@ -251,7 +254,7 @@ def extract_text(pdf):
|
|||
stdout, stderr = p.communicate()
|
||||
stdout = stdout.decode()
|
||||
stderr = stderr.decode()
|
||||
if sys.platform == 'darwin':
|
||||
if sys.platform == 'darwin' and cmd[0] == '/usr/bin/mdimport':
|
||||
if 'kMDItemTextContent' in stderr:
|
||||
stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2]
|
||||
else:
|
||||
|
|
Loading…
Reference in a new issue