run pdftotext only once

This commit is contained in:
j 2016-02-07 17:11:00 +05:30
parent d799e690b5
commit 9747f27d31

View file

@ -229,18 +229,19 @@ def info(pdf):
''' '''
def extract_text(pdf): def extract_text(pdf):
if sys.platform == 'win32':
pdf = get_short_path_name(pdf)
if sys.platform == 'darwin': if sys.platform == 'darwin':
cmd = ['/usr/bin/mdimport', '-d2', pdf] cmd = ['/usr/bin/mdimport', '-d2', pdf]
else: else:
if sys.platform == 'win32':
pdf = get_short_path_name(pdf)
cmd = ['pdftotext', pdf, '-'] cmd = ['pdftotext', pdf, '-']
if sys.platform == 'win32': if sys.platform == 'win32':
startupinfo = subprocess.STARTUPINFO() startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = subprocess.SW_HIDE startupinfo.wShowWindow = subprocess.SW_HIDE
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=startupinfo) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, startupinfo=startupinfo)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) else:
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
stdout = stdout.decode() stdout = stdout.decode()
stderr = stderr.decode() stderr = stderr.decode()