dont take pdf metadata if title starts with Microsoft Word

2016-02-14 20:26:17 +05:30 · 2016-02-14 20:26:17 +05:30 · a96b55e006
commit a96b55e006
parent 6a91ac9465
1 changed files with 4 additions and 0 deletions
--- a/oml/media/pdf.py
+++ b/oml/media/pdf.py
@ -220,6 +220,10 @@ def info(pdf):
        data['author'] = [ox.normalize_name(data['author'])]
    if 'description' in data:
        data['description'] = ox.strip_tags(ox.decode_html(data['description'])).strip()
+    if data.get('title', '').startswith('Microsoft Word'):
+        for key in ('title', 'author', 'producer', 'creator'):
+            if key in data:
+                del data[key]
    return data

 '''