dont take pdf metadata if title starts with Microsoft Word
This commit is contained in:
parent
6a91ac9465
commit
a96b55e006
1 changed files with 4 additions and 0 deletions
|
@ -220,6 +220,10 @@ def info(pdf):
|
||||||
data['author'] = [ox.normalize_name(data['author'])]
|
data['author'] = [ox.normalize_name(data['author'])]
|
||||||
if 'description' in data:
|
if 'description' in data:
|
||||||
data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip()
|
data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip()
|
||||||
|
if data.get('title', '').startswith('Microsoft Word'):
|
||||||
|
for key in ('title', 'author', 'producer', 'creator'):
|
||||||
|
if key in data:
|
||||||
|
del data[key]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
Loading…
Reference in a new issue