openmedialibrary/oml/media/txt.py

39 lines
873 B
Python
Raw Normal View History

2014-05-04 17:26:43 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import os
from utils import find_isbns
2014-05-19 18:12:02 +00:00
import tempfile
import subprocess
2014-05-04 17:26:43 +00:00
def cover(path):
2014-05-19 18:12:02 +00:00
image = tempfile.mkstemp('.jpg')[1]
cmd = ['python2', 'static/txt.js/txt.py', '-i', path, '-o', image]
2014-08-22 16:49:11 +00:00
p = subprocess.Popen(cmd, close_fds=True)
2014-05-19 18:12:02 +00:00
p.wait()
with open(image, 'rb') as fd:
data = fd.read()
os.unlink(image)
2014-05-04 17:26:43 +00:00
return data
def info(path):
data = {}
data['title'] = os.path.splitext(os.path.basename(path))[0]
text = extract_text(path)
isbn = extract_isbn(text)
if isbn:
2014-05-21 00:02:21 +00:00
data['isbn'] = [isbn]
2014-05-04 17:26:43 +00:00
data['textsize'] = len(text)
return data
def extract_text(path):
with open(path) as fd:
data = fd.read()
return data
def extract_isbn(text):
isbns = find_isbns(text)
if isbns:
return isbns[0]