From a96b55e0065020a176a2d8f027a115c0d7fa2bb4 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 14 Feb 2016 20:26:17 +0530 Subject: [PATCH] dont take pdf metadata if title starts with Microsoft Word --- oml/media/pdf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/oml/media/pdf.py b/oml/media/pdf.py index b370525..3aa1d17 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -220,6 +220,10 @@ def info(pdf): data['author'] = [ox.normalize_name(data['author'])] if 'description' in data: data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip() + if data.get('title', '').startswith('Microsoft Word'): + for key in ('title', 'author', 'producer', 'creator'): + if key in data: + del data[key] return data '''