add meta.extract_text
This commit is contained in:
parent
67d75f7154
commit
f43fc6a172
2 changed files with 30 additions and 0 deletions
|
|
@ -123,3 +123,17 @@ def metadata(f, from_=None):
|
|||
data[key] = [data[key]] if data[key] else []
|
||||
return data
|
||||
|
||||
def extract_text(path):
|
||||
ext = path.split('.')[-1]
|
||||
text = ''
|
||||
try:
|
||||
if ext in ('epub', 'kepub'):
|
||||
text = epub.extract_text(path)
|
||||
elif ext == 'pdf':
|
||||
text = pdf.extract_text(path)
|
||||
elif ext == 'txt':
|
||||
text = txt.extract_text(path)
|
||||
except:
|
||||
logger.debug('failed to extract text from %s', path, exc_info=1)
|
||||
text = ''
|
||||
return text
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue