diff --git a/oml/media/epub.py b/oml/media/epub.py index 7865a56..16f3431 100644 --- a/oml/media/epub.py +++ b/oml/media/epub.py @@ -12,7 +12,7 @@ from urllib.parse import unquote from PIL import Image import stdnum.isbn -from utils import normalize_isbn, find_isbns +from utils import normalize_isbn, find_isbns, get_language import logging logger = logging.getLogger(__name__) @@ -99,6 +99,8 @@ def info(epub): data['isbn'] = [isbn] if 'date' in data and 'T' in data['date']: data['date'] = data['date'].split('T')[0] + if 'language' in data and isinstance(data['language'], str): + data['language'] = get_language(data['language']) return data def extract_text(path): diff --git a/oml/media/opf.py b/oml/media/opf.py index 9040bce..fa553fd 100644 --- a/oml/media/opf.py +++ b/oml/media/opf.py @@ -6,7 +6,7 @@ import xml.etree.ElementTree as ET import stdnum.isbn -from utils import normalize_isbn +from utils import normalize_isbn, get_language from ox import strip_tags import ox.iso @@ -48,5 +48,5 @@ def info(opf): if 'date' in data and len(data['date']) > 10: data['date'] =data['date'][:10] if 'language' in data: - data['language'] = ox.iso.codeToLang(data['language']) + data['language'] = get_language(data['language']) return data diff --git a/oml/media/pdf.py b/oml/media/pdf.py index c4d8252..7f4fed4 100644 --- a/oml/media/pdf.py +++ b/oml/media/pdf.py @@ -10,12 +10,11 @@ import shutil from glob import glob from datetime import datetime -import ox from PyPDF2 import PdfFileReader import stdnum.isbn import settings -from utils import normalize_isbn, find_isbns +from utils import normalize_isbn, find_isbns, get_language import logging logger = logging.getLogger(__name__) @@ -163,7 +162,7 @@ def info(pdf): if data[key] in ('Unknown',): del data[key] if key == 'language': - data[key] = ox.iso.codeToLang(data[key]) + data[key] = get_language(data[key]) text = extract_text(pdf) data['textsize'] = len(text) if settings.server['extract_text']: diff --git a/oml/utils.py b/oml/utils.py index 454c4e0..7c568b3 100644 --- a/oml/utils.py +++ b/oml/utils.py @@ -121,6 +121,9 @@ def get_position_by_id(list, key): return i return -1 +def get_language(lang): + return ox.iso.codeToLang(lang.split('-')[0]) or lang + def valid(key, value, sig): ''' validate that value was signed by key