openmedialibrary/oml/media/txt.py

39 lines
883 B
Python
Raw Normal View History

2014-05-04 19:26:43 +02:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2014-09-03 00:32:44 +02:00
2014-05-04 19:26:43 +02:00
import os
from utils import find_isbns
2014-05-19 20:12:02 +02:00
import tempfile
import subprocess
2014-05-04 19:26:43 +02:00
def cover(path):
2014-05-19 20:12:02 +02:00
image = tempfile.mkstemp('.jpg')[1]
2015-03-08 01:38:57 +05:30
cmd = ['python3', '../reader/txt.js/txt.py', '-i', path, '-o', image]
2014-08-22 18:49:11 +02:00
p = subprocess.Popen(cmd, close_fds=True)
2014-05-19 20:12:02 +02:00
p.wait()
with open(image, 'rb') as fd:
data = fd.read()
os.unlink(image)
2014-05-04 19:26:43 +02:00
return data
def info(path):
data = {}
data['title'] = os.path.splitext(os.path.basename(path))[0]
text = extract_text(path)
isbn = extract_isbn(text)
if isbn:
data['isbn'] = isbn
2014-05-04 19:26:43 +02:00
data['textsize'] = len(text)
return data
def extract_text(path):
2014-10-31 15:41:46 +01:00
with open(path, 'rb') as fd:
data = fd.read().decode('utf-8', errors='replace')
2014-05-04 19:26:43 +02:00
return data
def extract_isbn(text):
isbns = find_isbns(text)
if isbns:
return isbns[0]