openmedialibrary/oml/media/opf.py

50 lines
1.4 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
2014-09-02 22:32:44 +00:00
import xml.etree.ElementTree as ET
from utils import get_language, to_isbn13
from ox import strip_tags
import logging
logger = logging.getLogger(__name__)
def info(opf):
data = {}
try:
with open(opf, 'rb') as fd:
opf = ET.fromstring(fd.read().decode())
except:
2016-01-24 09:13:03 +00:00
logger.debug('failed to load opf %s', opf, exc_info=True)
return data
ns = '{http://www.idpf.org/2007/opf}'
metadata = opf.findall(ns + 'metadata')[0]
for e in metadata.getchildren():
if e.text:
key = e.tag.split('}')[-1]
key = {
'creator': 'author',
}.get(key, key)
value = e.text
if key == 'identifier':
isbn = to_isbn13(value)
if isbn:
data['isbn'] = isbn
2016-02-07 08:24:38 +00:00
'''
if e.attrib.get(ns + 'scheme') == 'AMAZON':
if not 'asin' in data:
data['asin'] = [value]
else:
data['asin'].append(value)
2016-02-07 08:24:38 +00:00
'''
else:
data[key] = strip_tags(e.text)
#YYY-MM-DD
if 'date' in data and len(data['date']) > 10:
data['date'] =data['date'][:10]
if 'language' in data:
2015-12-25 14:10:49 +00:00
data['language'] = get_language(data['language'])
return data