diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py index 40ff52be..1872b9ce 100644 --- a/pandora/document/fulltext.py +++ b/pandora/document/fulltext.py @@ -19,10 +19,11 @@ def extract_text(pdf, page=None): # split page from pdf and ocr fd, page_pdf = tempfile.mkstemp('.pdf') cmd = ['pdfseparate', '-f', page, '-l', page, pdf, page_pdf] - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) stdout, stderr = p.communicate() text = ocr_image(page_pdf) os.unlink(page_pdf) + os.close(fd) return text else: return ocr_image(pdf)