run pdftotext only once
This commit is contained in:
parent
d799e690b5
commit
9747f27d31
1 changed files with 4 additions and 3 deletions
|
@ -229,18 +229,19 @@ def info(pdf):
|
||||||
|
|
||||||
'''
|
'''
|
||||||
def extract_text(pdf):
|
def extract_text(pdf):
|
||||||
|
if sys.platform == 'win32':
|
||||||
|
pdf = get_short_path_name(pdf)
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
cmd = ['/usr/bin/mdimport', '-d2', pdf]
|
cmd = ['/usr/bin/mdimport', '-d2', pdf]
|
||||||
else:
|
else:
|
||||||
if sys.platform == 'win32':
|
|
||||||
pdf = get_short_path_name(pdf)
|
|
||||||
cmd = ['pdftotext', pdf, '-']
|
cmd = ['pdftotext', pdf, '-']
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
startupinfo = subprocess.STARTUPINFO()
|
startupinfo = subprocess.STARTUPINFO()
|
||||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||||
startupinfo.wShowWindow = subprocess.SW_HIDE
|
startupinfo.wShowWindow = subprocess.SW_HIDE
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=startupinfo)
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=startupinfo)
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
else:
|
||||||
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
stdout = stdout.decode()
|
stdout = stdout.decode()
|
||||||
stderr = stderr.decode()
|
stderr = stderr.decode()
|
||||||
|
|
Loading…
Reference in a new issue