openmedialibrary/oml/media/__init__.py

56 lines
1.3 KiB
Python
Raw Normal View History

2014-05-12 23:43:27 +00:00
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import base64
import hashlib
import os
import ox
2014-05-04 17:26:43 +00:00
import pdf
import epub
import txt
2014-05-12 23:43:27 +00:00
def get_id(f=None, data=None):
if data:
return base64.b32encode(hashlib.sha1(data).digest())
else:
return base64.b32encode(ox.sha1sum(f).decode('hex'))
2014-05-04 17:26:43 +00:00
def metadata(f):
ext = f.split('.')[-1]
data = {}
2014-05-18 23:24:04 +00:00
data['extension'] = ext
data['size'] = os.stat(f).st_size
2014-05-04 17:26:43 +00:00
if ext == 'pdf':
info = pdf.info(f)
elif ext == 'epub':
info = epub.info(f)
elif ext == 'txt':
info = txt.info(f)
for key in (
'title', 'author', 'date', 'publisher', 'isbn',
'textsize', 'pages'
):
2014-05-04 17:26:43 +00:00
if key in info:
value = info[key]
if isinstance(value, str):
try:
value = value.decode('utf-8')
except:
value = None
if value:
data[key] = info[key]
if 'isbn' in data:
2014-05-21 00:02:21 +00:00
data['primaryid'] = ['isbn', data['isbn'][0]]
2014-05-04 17:26:43 +00:00
if not 'title' in data:
data['title'] = os.path.splitext(os.path.basename(f))[0]
if 'author' in data and isinstance(data['author'], basestring):
data['author'] = [data['author']]
return data