only extract fulltext if fulltext key is defined

This commit is contained in:
j 2019-12-01 16:17:58 +01:00
parent 96912f14b0
commit 44b4f092d1
1 changed files with 15 additions and 10 deletions

View File

@ -27,10 +27,11 @@ class FulltextMixin:
return es return es
def extract_fulltext(self): def extract_fulltext(self):
if self.extension == 'pdf': if self.file:
return extract_text(self.file.path) if self.extension == 'pdf':
elif self.extension in ('png', 'jpg'): return extract_text(self.file.path)
return ocr_image(self.file.path) elif self.extension in ('png', 'jpg'):
return ocr_image(self.file.path)
elif self.extension == 'html': elif self.extension == 'html':
return self.data.get('text', '') return self.data.get('text', '')
return '' return ''
@ -38,13 +39,17 @@ class FulltextMixin:
def delete_fulltext(self): def delete_fulltext(self):
res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id) res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id)
def has_fulltext_key():
return bool([k for k in settings.CONFIG['documentKeys'] if k.get('fulltext')])
def update_fulltext(self): def update_fulltext(self):
text = self.extract_fulltext() if self.has_fulltext_key():
if text: text = self.extract_fulltext()
doc = { if text:
'text': text.lower() doc = {
} 'text': text.lower()
res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc) }
res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc)
@classmethod @classmethod
def find_fulltext(cls, query): def find_fulltext(cls, query):