normalize language
This commit is contained in:
parent
c5afc46af1
commit
f8c09226de
4 changed files with 10 additions and 6 deletions
|
@ -12,7 +12,7 @@ from urllib.parse import unquote
|
|||
from PIL import Image
|
||||
import stdnum.isbn
|
||||
|
||||
from utils import normalize_isbn, find_isbns
|
||||
from utils import normalize_isbn, find_isbns, get_language
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -99,6 +99,8 @@ def info(epub):
|
|||
data['isbn'] = [isbn]
|
||||
if 'date' in data and 'T' in data['date']:
|
||||
data['date'] = data['date'].split('T')[0]
|
||||
if 'language' in data and isinstance(data['language'], str):
|
||||
data['language'] = get_language(data['language'])
|
||||
return data
|
||||
|
||||
def extract_text(path):
|
||||
|
|
|
@ -6,7 +6,7 @@ import xml.etree.ElementTree as ET
|
|||
|
||||
import stdnum.isbn
|
||||
|
||||
from utils import normalize_isbn
|
||||
from utils import normalize_isbn, get_language
|
||||
from ox import strip_tags
|
||||
import ox.iso
|
||||
|
||||
|
@ -48,5 +48,5 @@ def info(opf):
|
|||
if 'date' in data and len(data['date']) > 10:
|
||||
data['date'] =data['date'][:10]
|
||||
if 'language' in data:
|
||||
data['language'] = ox.iso.codeToLang(data['language'])
|
||||
data['language'] = get_language(data['language'])
|
||||
return data
|
||||
|
|
|
@ -10,12 +10,11 @@ import shutil
|
|||
from glob import glob
|
||||
from datetime import datetime
|
||||
|
||||
import ox
|
||||
from PyPDF2 import PdfFileReader
|
||||
import stdnum.isbn
|
||||
|
||||
import settings
|
||||
from utils import normalize_isbn, find_isbns
|
||||
from utils import normalize_isbn, find_isbns, get_language
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -163,7 +162,7 @@ def info(pdf):
|
|||
if data[key] in ('Unknown',):
|
||||
del data[key]
|
||||
if key == 'language':
|
||||
data[key] = ox.iso.codeToLang(data[key])
|
||||
data[key] = get_language(data[key])
|
||||
text = extract_text(pdf)
|
||||
data['textsize'] = len(text)
|
||||
if settings.server['extract_text']:
|
||||
|
|
|
@ -121,6 +121,9 @@ def get_position_by_id(list, key):
|
|||
return i
|
||||
return -1
|
||||
|
||||
def get_language(lang):
|
||||
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
||||
|
||||
def valid(key, value, sig):
|
||||
'''
|
||||
validate that value was signed by key
|
||||
|
|
Loading…
Reference in a new issue