# -*- coding: utf-8 -*- import xml.etree.ElementTree as ET from utils import get_language, to_isbn13 from ox import strip_tags import logging logger = logging.getLogger(__name__) def info(opf): data = {} try: with open(opf, 'rb') as fd: opf = ET.fromstring(fd.read().decode()) except: logger.debug('failed to load opf %s', opf, exc_info=True) return data ns = '{http://www.idpf.org/2007/opf}' metadata = opf.findall(ns + 'metadata')[0] for e in metadata.getchildren(): if e.text: key = e.tag.split('}')[-1] key = { 'creator': 'author', }.get(key, key) value = e.text if key == 'identifier': isbn = to_isbn13(value) if isbn: data['isbn'] = isbn ''' if e.attrib.get(ns + 'scheme') == 'AMAZON': if not 'asin' in data: data['asin'] = [value] else: data['asin'].append(value) ''' else: data[key] = strip_tags(e.text) #YYY-MM-DD if 'date' in data and len(data['date']) > 10: data['date'] =data['date'][:10] if 'language' in data: data['language'] = get_language(data['language']) return data