diff --git a/oml/media/pdf.py b/oml/media/pdf.py index b370525..3aa1d17 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -220,6 +220,10 @@ def info(pdf): data['author'] = [ox.normalize_name(data['author'])] if 'description' in data: data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip() + if data.get('title', '').startswith('Microsoft Word'): + for key in ('title', 'author', 'producer', 'creator'): + if key in data: + del data[key] return data '''