From c961aa5c648c7f682ebcfe46740d1e2d31d597a4 Mon Sep 17 00:00:00 2001 From: j Date: Tue, 30 Sep 2014 22:30:09 +0200 Subject: [PATCH] fix text extraction on osx --- oml/media/pdf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/oml/media/pdf.py b/oml/media/pdf.py index 5790bac..a6a290c 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -170,6 +170,8 @@ def extract_text(pdf): cmd = ['pdftotext', pdf, '-'] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) stdout, stderr = p.communicate() + stdout = stdout.decode() + stderr = stderr.decode() if sys.platform == 'darwin': if 'kMDItemTextContent' in stderr: stdout = stderr.split('kMDItemTextContent = "')[-1].split('\n')[0][:-2]