extract detail from pdf
This commit is contained in:
parent
fd34ba305c
commit
5bd561e64f
3 changed files with 75 additions and 5 deletions
|
@ -93,6 +93,24 @@ class EpubHandler(OMLHandler):
|
|||
self.set_header('Content-Type', content_type)
|
||||
self.write(z.read(filename))
|
||||
|
||||
class CropHandler(OMLHandler):
|
||||
|
||||
def get(self, id, page, left, top, right, bottom):
|
||||
from media.pdf import crop
|
||||
with db.session():
|
||||
item = Item.get(id)
|
||||
path = item.get_path()
|
||||
print(path, page, left, top, right, bottom)
|
||||
data = crop(path, page, left, top, right, bottom)
|
||||
if data:
|
||||
self.set_header('Content-Type', 'image/jpeg')
|
||||
self.set_header('Content-Length', str(len(data)))
|
||||
self.write(data)
|
||||
return
|
||||
self.set_status(404)
|
||||
return
|
||||
|
||||
|
||||
def serve_static(handler, path, mimetype, include_body=True, disposition=None):
|
||||
handler.set_header('Content-Type', mimetype)
|
||||
size = os.stat(path).st_size
|
||||
|
|
|
@ -10,6 +10,7 @@ from glob import glob
|
|||
from datetime import datetime
|
||||
|
||||
from PyPDF2 import PdfFileReader
|
||||
from PIL import Image
|
||||
import ox
|
||||
|
||||
import settings
|
||||
|
@ -24,13 +25,13 @@ def cover(pdf):
|
|||
else:
|
||||
return page(pdf, 1)
|
||||
|
||||
def ql_cover(pdf):
|
||||
def ql_cover(pdf, size=1024):
|
||||
tmp = tempfile.mkdtemp()
|
||||
cmd = [
|
||||
'qlmanage',
|
||||
'-t',
|
||||
'-s',
|
||||
'1024',
|
||||
str(size),
|
||||
'-o',
|
||||
tmp,
|
||||
pdf
|
||||
|
@ -48,7 +49,7 @@ def ql_cover(pdf):
|
|||
shutil.rmtree(tmp)
|
||||
return data
|
||||
|
||||
def page(pdf, page):
|
||||
def page(pdf, page, size=1024):
|
||||
tmp = tempfile.mkdtemp()
|
||||
if sys.platform == 'win32':
|
||||
pdf = get_short_path_name(pdf)
|
||||
|
@ -57,7 +58,7 @@ def page(pdf, page):
|
|||
pdf,
|
||||
'-jpeg',
|
||||
'-f', str(page), '-l', str(page),
|
||||
'-scale-to', '1024', '-cropbox',
|
||||
'-scale-to', str(size), '-cropbox',
|
||||
os.path.join(tmp, 'page')
|
||||
]
|
||||
if sys.platform == 'win32':
|
||||
|
@ -79,6 +80,47 @@ def page(pdf, page):
|
|||
shutil.rmtree(tmp)
|
||||
return data
|
||||
|
||||
def crop(pdf, page, left, top, right, bottom):
|
||||
size = 2048
|
||||
tmp = tempfile.mkdtemp()
|
||||
if sys.platform == 'win32':
|
||||
pdf = get_short_path_name(pdf)
|
||||
cmd = [
|
||||
'pdftocairo',
|
||||
pdf,
|
||||
'-jpeg',
|
||||
'-f', str(page), '-l', str(page),
|
||||
'-scale-to', str(size), '-cropbox',
|
||||
os.path.join(tmp, 'page')
|
||||
]
|
||||
if sys.platform == 'win32':
|
||||
startupinfo = subprocess.STARTUPINFO()
|
||||
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
|
||||
startupinfo.wShowWindow = subprocess.SW_HIDE
|
||||
p = subprocess.Popen(cmd, close_fds=True, startupinfo=startupinfo)
|
||||
else:
|
||||
p = subprocess.Popen(cmd, close_fds=True)
|
||||
p.wait()
|
||||
image = glob('%s/*' % tmp)
|
||||
if image:
|
||||
image = image[0]
|
||||
crop = [int(p) for p in (left, top, right, bottom)]
|
||||
print(crop)
|
||||
img = Image.open(image).crop(crop)
|
||||
img.save(image)
|
||||
with open(image, 'rb') as fd:
|
||||
data = fd.read()
|
||||
else:
|
||||
logger.debug('pdftocairo %s %s', pdf, ' '.join(cmd))
|
||||
data = None
|
||||
shutil.rmtree(tmp)
|
||||
return data
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
'''
|
||||
def page(pdf, page):
|
||||
image = tempfile.mkstemp('.jpg')[1]
|
||||
|
@ -281,3 +323,4 @@ def extract_isbn(text):
|
|||
isbns = find_isbns(text)
|
||||
if isbns:
|
||||
return isbns[0]
|
||||
|
||||
|
|
|
@ -7,11 +7,13 @@ import signal
|
|||
import time
|
||||
|
||||
from tornado.ioloop import IOLoop
|
||||
from tornado.web import StaticFileHandler, Application
|
||||
import tornado.web
|
||||
from tornado.web import Application
|
||||
|
||||
from cache import Cache
|
||||
from item.handlers import EpubHandler, ReaderHandler, FileHandler
|
||||
from item.handlers import OMLHandler, UploadHandler
|
||||
from item.handlers import CropHandler
|
||||
from item.icons import IconHandler
|
||||
import db
|
||||
import node.server
|
||||
|
@ -29,6 +31,12 @@ import logging
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class StaticFileHandler(tornado.web.StaticFileHandler):
|
||||
def get_content_type(self):
|
||||
if self.request.path.split('?')[0].endswith('.mjs'):
|
||||
return 'application/javascript'
|
||||
return super().get_content_type()
|
||||
|
||||
class MainHandler(OMLHandler):
|
||||
|
||||
def get(self, path):
|
||||
|
@ -126,6 +134,7 @@ def run():
|
|||
(r'/(.*?)/get/', FileHandler, {
|
||||
'attachment': True
|
||||
}),
|
||||
(r'/(.*)/2048p(\d*),(\d*),(\d*),(\d*),(\d*).jpg', CropHandler),
|
||||
(r'/(.*)/(cover|preview)(\d*).jpg', IconHandler),
|
||||
]
|
||||
handlers = common_handlers + [
|
||||
|
|
Loading…
Reference in a new issue