fix text extraction on osx
This commit is contained in:
parent
7502b122a1
commit
c961aa5c64
1 changed files with 2 additions and 0 deletions
|
@ -170,6 +170,8 @@ def extract_text(pdf):
|
||||||
cmd = ['pdftotext', pdf, '-']
|
cmd = ['pdftotext', pdf, '-']
|
||||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
|
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
|
stdout = stdout.decode()
|
||||||
|
stderr = stderr.decode()
|
||||||
if sys.platform == 'darwin':
|
if sys.platform == 'darwin':
|
||||||
if 'kMDItemTextContent' in stderr:
|
if 'kMDItemTextContent' in stderr:
|
||||||
stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2]
|
stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2]
|
||||||
|
|
Loading…
Reference in a new issue