openmedialibrary/oml/media/txt.py

39 lines
883 B
Python
Raw Normal View History

2014-05-04 17:26:43 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2014-09-02 22:32:44 +00:00
2014-05-04 17:26:43 +00:00
import os
from utils import find_isbns
2014-05-19 18:12:02 +00:00
import tempfile
import subprocess
2014-05-04 17:26:43 +00:00
def cover(path):
2014-05-19 18:12:02 +00:00
image = tempfile.mkstemp('.jpg')[1]
2015-03-07 20:08:57 +00:00
cmd = ['python3', '../reader/txt.js/txt.py', '-i', path, '-o', image]
2014-08-22 16:49:11 +00:00
p = subprocess.Popen(cmd, close_fds=True)
2014-05-19 18:12:02 +00:00
p.wait()
with open(image, 'rb') as fd:
data = fd.read()
os.unlink(image)
2014-05-04 17:26:43 +00:00
return data
def info(path):
data = {}
data['title'] = os.path.splitext(os.path.basename(path))[0]
text = extract_text(path)
isbn = extract_isbn(text)
if isbn:
data['isbn'] = isbn
2014-05-04 17:26:43 +00:00
data['textsize'] = len(text)
return data
def extract_text(path):
2014-10-31 14:41:46 +00:00
with open(path, 'rb') as fd:
data = fd.read().decode('utf-8', errors='replace')
2014-05-04 17:26:43 +00:00
return data
def extract_isbn(text):
isbns = find_isbns(text)
if isbns:
return isbns[0]