forked from 0x2620/pandora
only extract fulltext if fulltext key is defined
This commit is contained in:
parent
96912f14b0
commit
44b4f092d1
1 changed files with 15 additions and 10 deletions
|
@ -27,10 +27,11 @@ class FulltextMixin:
|
||||||
return es
|
return es
|
||||||
|
|
||||||
def extract_fulltext(self):
|
def extract_fulltext(self):
|
||||||
if self.extension == 'pdf':
|
if self.file:
|
||||||
return extract_text(self.file.path)
|
if self.extension == 'pdf':
|
||||||
elif self.extension in ('png', 'jpg'):
|
return extract_text(self.file.path)
|
||||||
return ocr_image(self.file.path)
|
elif self.extension in ('png', 'jpg'):
|
||||||
|
return ocr_image(self.file.path)
|
||||||
elif self.extension == 'html':
|
elif self.extension == 'html':
|
||||||
return self.data.get('text', '')
|
return self.data.get('text', '')
|
||||||
return ''
|
return ''
|
||||||
|
@ -38,13 +39,17 @@ class FulltextMixin:
|
||||||
def delete_fulltext(self):
|
def delete_fulltext(self):
|
||||||
res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id)
|
res = self.elasticsearch().delete(index=self._ES_INDEX, doc_type='document', id=self.id)
|
||||||
|
|
||||||
|
def has_fulltext_key():
|
||||||
|
return bool([k for k in settings.CONFIG['documentKeys'] if k.get('fulltext')])
|
||||||
|
|
||||||
def update_fulltext(self):
|
def update_fulltext(self):
|
||||||
text = self.extract_fulltext()
|
if self.has_fulltext_key():
|
||||||
if text:
|
text = self.extract_fulltext()
|
||||||
doc = {
|
if text:
|
||||||
'text': text.lower()
|
doc = {
|
||||||
}
|
'text': text.lower()
|
||||||
res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc)
|
}
|
||||||
|
res = self.elasticsearch().index(index=self._ES_INDEX, doc_type='document', id=self.id, body=doc)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def find_fulltext(cls, query):
|
def find_fulltext(cls, query):
|
||||||
|
|
Loading…
Reference in a new issue