cleanup pdf
This commit is contained in:
parent
935f8d7f2b
commit
c5afc46af1
1 changed files with 8 additions and 9 deletions
|
@ -115,10 +115,7 @@ def info(pdf):
|
|||
value = value[0]
|
||||
if isinstance(value, bytes):
|
||||
value = value.decode('utf-16')
|
||||
if value in ('Unknown',):
|
||||
value = None
|
||||
if value:
|
||||
data[key[1:].lower()] = value
|
||||
data[key[1:].lower()] = value
|
||||
except:
|
||||
pass
|
||||
|
||||
|
@ -134,15 +131,12 @@ def info(pdf):
|
|||
value = [v.strftime('%Y-%m-%d') if isinstance(v, datetime) else v for v in value]
|
||||
if len(value) == 1:
|
||||
value = value[0]
|
||||
if value in ('Unknown',):
|
||||
value = None
|
||||
_key = key[3:]
|
||||
if value and _key not in data:
|
||||
if _key == 'language':
|
||||
value = ox.iso.codeToLang(value)
|
||||
data[_key] = value
|
||||
except:
|
||||
logger.debug('FAILED TO PARSE %s', pdf, exc_info=1)
|
||||
|
||||
'''
|
||||
cmd = ['pdfinfo', pdf]
|
||||
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
|
||||
|
@ -164,7 +158,12 @@ def info(pdf):
|
|||
for key, value in data.items():
|
||||
if isinstance(value, dict):
|
||||
value = ' '.join(list(value.values()))
|
||||
data[key] = value
|
||||
data[key] = value.strip()
|
||||
for key in list(data):
|
||||
if data[key] in ('Unknown',):
|
||||
del data[key]
|
||||
if key == 'language':
|
||||
data[key] = ox.iso.codeToLang(data[key])
|
||||
text = extract_text(pdf)
|
||||
data['textsize'] = len(text)
|
||||
if settings.server['extract_text']:
|
||||
|
|
Loading…
Reference in a new issue