import text not html
This commit is contained in:
parent
97006b0b3e
commit
84c15c690a
3 changed files with 15 additions and 1 deletions
|
@ -13,6 +13,8 @@ from . import worldcat
|
||||||
from . import google
|
from . import google
|
||||||
from . import duckduckgo
|
from . import duckduckgo
|
||||||
|
|
||||||
|
from .utils import decode_html_data
|
||||||
|
|
||||||
from oml import settings
|
from oml import settings
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -95,6 +97,7 @@ def lookup(key, value):
|
||||||
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
|
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
|
||||||
if key in data and not isinstance(data[key], list):
|
if key in data and not isinstance(data[key], list):
|
||||||
data[key] = [data[key]]
|
data[key] = [data[key]]
|
||||||
|
data = decode_html_data(data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def isvalid_id(key, value):
|
def isvalid_id(key, value):
|
||||||
|
|
|
@ -6,7 +6,7 @@ from ox.cache import get_json, store
|
||||||
import ox.web.google
|
import ox.web.google
|
||||||
import stdnum.isbn
|
import stdnum.isbn
|
||||||
|
|
||||||
from .utils import find_isbns, get_language
|
from .utils import find_isbns, get_language, decode_html_data
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -84,5 +84,6 @@ def info(key, value):
|
||||||
data['publisher'] = [data['publisher']]
|
data['publisher'] = [data['publisher']]
|
||||||
if 'language' in _data:
|
if 'language' in _data:
|
||||||
data['language'] = [get_language(_data['language'])]
|
data['language'] = [get_language(_data['language'])]
|
||||||
|
data = decode_html_data(data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -24,3 +24,13 @@ def find_isbns(text):
|
||||||
|
|
||||||
def get_language(lang):
|
def get_language(lang):
|
||||||
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
||||||
|
|
||||||
|
def decode_html_data(data):
|
||||||
|
if isinstance(data, dict):
|
||||||
|
for key in data:
|
||||||
|
data[key] = decode_html_data(data[key])
|
||||||
|
elif isinstance(data, list):
|
||||||
|
data = [decode_html_data(v) for v in data]
|
||||||
|
elif isinstance(data, str):
|
||||||
|
data = ox.decode_html(data)
|
||||||
|
return data
|
||||||
|
|
Loading…
Reference in a new issue