lots of stuff

This commit is contained in:
j 2014-05-21 02:02:21 +02:00
commit feddea0ccd
24 changed files with 1385 additions and 226 deletions

View file

@ -46,13 +46,7 @@ def metadata(f):
data[key] = info[key]
if 'isbn' in data:
value = data.pop('isbn')
if len(value) == 10:
data['isbn10'] = value
data['mainid'] = 'isbn10'
else:
data['isbn13'] = value
data['mainid'] = 'isbn13'
data['primaryid'] = ['isbn', data['isbn'][0]]
if not 'title' in data:
data['title'] = os.path.splitext(os.path.basename(f))[0]
if 'author' in data and isinstance(data['author'], basestring):

View file

@ -21,7 +21,7 @@ def cover(path):
z = zipfile.ZipFile(path)
data = None
for f in z.filelist:
if 'cover' in f.filename and f.filename.split('.')[-1] in ('jpg', 'jpeg', 'png'):
if 'cover' in f.filename.lower() and f.filename.split('.')[-1] in ('jpg', 'jpeg', 'png'):
logger.debug('using %s', f.filename)
data = z.read(f.filename)
break
@ -31,7 +31,12 @@ def cover(path):
info = ET.fromstring(z.read(opf[0]))
manifest = info.findall('{http://www.idpf.org/2007/opf}manifest')[0]
for e in manifest.getchildren():
if 'html' in e.attrib['media-type']:
if 'image' in e.attrib['media-type']:
filename = e.attrib['href']
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
data = z.read(filename)
break
elif 'html' in e.attrib['media-type']:
filename = e.attrib['href']
filename = os.path.normpath(os.path.join(os.path.dirname(opf[0]), filename))
html = z.read(filename)
@ -66,7 +71,7 @@ def info(epub):
if key == 'identifier':
value = normalize_isbn(value)
if stdnum.isbn.is_valid(value):
data['isbn'] = value
data['isbn'] = [value]
else:
data[key] = e.text
text = extract_text(epub)
@ -74,7 +79,7 @@ def info(epub):
if not 'isbn' in data:
isbn = extract_isbn(text)
if isbn:
data['isbn'] = isbn
data['isbn'] = [isbn]
if 'date' in data and 'T' in data['date']:
data['date'] = data['date'].split('T')[0]
return data

View file

@ -99,7 +99,7 @@ def info(pdf):
if 'identifier' in data:
value = normalize_isbn(data['identifier'])
if stdnum.isbn.is_valid(value):
data['isbn'] = value
data['isbn'] = [value]
del data['identifier']
'''
cmd = ['pdfinfo', pdf]
@ -120,7 +120,7 @@ def info(pdf):
if not 'isbn' in data:
isbn = extract_isbn(text)
if isbn:
data['isbn'] = isbn
data['isbn'] = [isbn]
return data
'''

View file

@ -23,7 +23,7 @@ def info(path):
text = extract_text(path)
isbn = extract_isbn(text)
if isbn:
data['isbn'] = isbn
data['isbn'] = [isbn]
data['textsize'] = len(text)
return data