fix text extraction on osx

This commit is contained in:
j 2014-09-30 22:30:09 +02:00
parent 7502b122a1
commit c961aa5c64

View file

@ -170,6 +170,8 @@ def extract_text(pdf):
cmd = ['pdftotext', pdf, '-'] cmd = ['pdftotext', pdf, '-']
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
stdout, stderr = p.communicate() stdout, stderr = p.communicate()
stdout = stdout.decode()
stderr = stderr.decode()
if sys.platform == 'darwin': if sys.platform == 'darwin':
if 'kMDItemTextContent' in stderr: if 'kMDItemTextContent' in stderr:
stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2] stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2]