normalize language
This commit is contained in:
parent
c5afc46af1
commit
f8c09226de
4 changed files with 10 additions and 6 deletions
|
@ -12,7 +12,7 @@ from urllib.parse import unquote
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
|
|
||||||
from utils import normalize_isbn, find_isbns
|
from utils import normalize_isbn, find_isbns, get_language
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -99,6 +99,8 @@ def info(epub):
|
||||||
data['isbn'] = [isbn]
|
data['isbn'] = [isbn]
|
||||||
if 'date' in data and 'T' in data['date']:
|
if 'date' in data and 'T' in data['date']:
|
||||||
data['date'] = data['date'].split('T')[0]
|
data['date'] = data['date'].split('T')[0]
|
||||||
|
if 'language' in data and isinstance(data['language'], str):
|
||||||
|
data['language'] = get_language(data['language'])
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def extract_text(path):
|
def extract_text(path):
|
||||||
|
|
|
@ -6,7 +6,7 @@ import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
|
|
||||||
from utils import normalize_isbn
|
from utils import normalize_isbn, get_language
|
||||||
from ox import strip_tags
|
from ox import strip_tags
|
||||||
import ox.iso
|
import ox.iso
|
||||||
|
|
||||||
|
@ -48,5 +48,5 @@ def info(opf):
|
||||||
if 'date' in data and len(data['date']) > 10:
|
if 'date' in data and len(data['date']) > 10:
|
||||||
data['date'] =data['date'][:10]
|
data['date'] =data['date'][:10]
|
||||||
if 'language' in data:
|
if 'language' in data:
|
||||||
data['language'] = ox.iso.codeToLang(data['language'])
|
data['language'] = get_language(data['language'])
|
||||||
return data
|
return data
|
||||||
|
|
|
@ -10,12 +10,11 @@ import shutil
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import ox
|
|
||||||
from PyPDF2 import PdfFileReader
|
from PyPDF2 import PdfFileReader
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
|
|
||||||
import settings
|
import settings
|
||||||
from utils import normalize_isbn, find_isbns
|
from utils import normalize_isbn, find_isbns, get_language
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -163,7 +162,7 @@ def info(pdf):
|
||||||
if data[key] in ('Unknown',):
|
if data[key] in ('Unknown',):
|
||||||
del data[key]
|
del data[key]
|
||||||
if key == 'language':
|
if key == 'language':
|
||||||
data[key] = ox.iso.codeToLang(data[key])
|
data[key] = get_language(data[key])
|
||||||
text = extract_text(pdf)
|
text = extract_text(pdf)
|
||||||
data['textsize'] = len(text)
|
data['textsize'] = len(text)
|
||||||
if settings.server['extract_text']:
|
if settings.server['extract_text']:
|
||||||
|
|
|
@ -121,6 +121,9 @@ def get_position_by_id(list, key):
|
||||||
return i
|
return i
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
def get_language(lang):
|
||||||
|
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
||||||
|
|
||||||
def valid(key, value, sig):
|
def valid(key, value, sig):
|
||||||
'''
|
'''
|
||||||
validate that value was signed by key
|
validate that value was signed by key
|
||||||
|
|
Loading…
Reference in a new issue