diff --git a/pandora/archive/extract.py b/pandora/archive/extract.py index 0b27ad02..f206de24 100644 --- a/pandora/archive/extract.py +++ b/pandora/archive/extract.py @@ -642,7 +642,5 @@ def chop(video, start, end, subtitles=None): p.wait() f = open(choped_video, 'rb') os.unlink(choped_video) - if subtitles_f and os.path.exists(subtitles_f): - os.unlink(subtitles_f) os.rmdir(tmp) return f diff --git a/pandora/document/utils.py b/pandora/document/utils.py index 3df53197..ee28ff53 100644 --- a/pandora/document/utils.py +++ b/pandora/document/utils.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import os import re import subprocess @@ -23,17 +22,11 @@ def pdfinfo(pdf): return data def extract_pdfpage(pdf, image, page): - page = str(page) - cmd = [ - 'pdftocairo', - '-jpeg', - '-f', page, '-l', page, - '-singlefile', - '-scale-to', '2048', - pdf, - os.path.splitext(image)[0] - ] - subprocess.call(cmd, stdout=open('/dev/null', 'wb')) + page -= 1 + cmd = ['convert', '%s[%d]' % (pdf, page), + '-background', 'white', '-flatten', '-resize', '1024x1024', image] + p = subprocess.Popen(cmd, close_fds=True) + p.wait() return image def get_documents(text):