dont take pdf metadata if title starts with Microsoft Word

This commit is contained in:
j 2016-02-14 20:26:17 +05:30
parent 6a91ac9465
commit a96b55e006

View file

@ -220,6 +220,10 @@ def info(pdf):
data['author'] = [ox.normalize_name(data['author'])] data['author'] = [ox.normalize_name(data['author'])]
if 'description' in data: if 'description' in data:
data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip() data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip()
if data.get('title', '').startswith('Microsoft Word'):
for key in ('title', 'author', 'producer', 'creator'):
if key in data:
del data[key]
return data return data
''' '''