From d722ae004bcdea7f5e7748794e4bf574be448cf3 Mon Sep 17 00:00:00 2001 From: j Date: Sat, 15 Nov 2014 00:57:49 +0000 Subject: [PATCH] handle utf-16 pdf info --- oml/item/scan.py | 1 + oml/media/pdf.py | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/oml/item/scan.py b/oml/item/scan.py index 32bbc6b..69fa406 100644 --- a/oml/item/scan.py +++ b/oml/item/scan.py @@ -42,6 +42,7 @@ def add_file(id, f, prefix, from_=None): user = state.user() path = f[len(prefix):] data = media.metadata(f, from_) + print(path) file = File.get_or_create(id, data, path) item = file.item if 'primaryid' in file.info: diff --git a/oml/media/pdf.py b/oml/media/pdf.py index deed8e2..a49522b 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -107,7 +107,13 @@ def info(pdf): if info: for key in info: if info[key]: - data[key[1:].lower()] = info[key] + try: + if isinstance(info[key], bytes): + info[key] = info[key].decode('utf-16') + data[key[1:].lower()] = info[key] + except: + pass + xmp = pdfreader.getXmpMetadata() if xmp: for key in dir(xmp): @@ -122,7 +128,6 @@ def info(pdf): data[_key] = value except: logger.debug('FAILED TO PARSE %s', pdf, exc_info=1) - ''' cmd = ['pdfinfo', pdf] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)