From 44b4f092d1059e3f5c97b14f39c10a619b1a4e81 Mon Sep 17 00:00:00 2001 From: j Date: Sun, 1 Dec 2019 16:17:58 +0100 Subject: [PATCH] only extract fulltext if fulltext key is defined --- pandora/document/fulltext.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pandora/document/fulltext.py b/pandora/document/fulltext.py index 3990b63d..d64cd258 100644 --- a/pandora/document/fulltext.py +++ b/pandora/document/fulltext.py @@ -27,10 +27,11 @@ class FulltextMixin: return es def extract_fulltext(self): - if self.extension == 'pdf': - return extract_text(self.file.path) - elif self.extension in ('png', 'jpg'): - return ocr_image(self.file.path) + if self.file: + if self.extension == 'pdf': + return extract_text(self.file.path) + elif self.extension in ('png', 'jpg'): + return ocr_image(self.file.path) elif self.extension == 'html': return self.data.get('text', '') return '' @@ -38,13 +39,17 @@ class FulltextMixin: def delete_fulltext(self): res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id) + def has_fulltext_key(): + return bool([k for k in settings.CONFIG['documentKeys'] if k.get('fulltext')]) + def update_fulltext(self): - text = self.extract_fulltext() - if text: - doc = { - 'text': text.lower() - } - res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc) + if self.has_fulltext_key(): + text = self.extract_fulltext() + if text: + doc = { + 'text': text.lower() + } + res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc) @classmethod def find_fulltext(cls, query):