import text not html
This commit is contained in:
parent
97006b0b3e
commit
84c15c690a
3 changed files with 15 additions and 1 deletions
|
@ -13,6 +13,8 @@ from . import worldcat
|
|||
from . import google
|
||||
from . import duckduckgo
|
||||
|
||||
from .utils import decode_html_data
|
||||
|
||||
from oml import settings
|
||||
|
||||
import logging
|
||||
|
@ -95,6 +97,7 @@ def lookup(key, value):
|
|||
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
|
||||
if key in data and not isinstance(data[key], list):
|
||||
data[key] = [data[key]]
|
||||
data = decode_html_data(data)
|
||||
return data
|
||||
|
||||
def isvalid_id(key, value):
|
||||
|
|
|
@ -6,7 +6,7 @@ from ox.cache import get_json, store
|
|||
import ox.web.google
|
||||
import stdnum.isbn
|
||||
|
||||
from .utils import find_isbns, get_language
|
||||
from .utils import find_isbns, get_language, decode_html_data
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -84,5 +84,6 @@ def info(key, value):
|
|||
data['publisher'] = [data['publisher']]
|
||||
if 'language' in _data:
|
||||
data['language'] = [get_language(_data['language'])]
|
||||
data = decode_html_data(data)
|
||||
return data
|
||||
|
||||
|
|
|
@ -24,3 +24,13 @@ def find_isbns(text):
|
|||
|
||||
def get_language(lang):
|
||||
return ox.iso.codeToLang(lang.split('-')[0]) or lang
|
||||
|
||||
def decode_html_data(data):
|
||||
if isinstance(data, dict):
|
||||
for key in data:
|
||||
data[key] = decode_html_data(data[key])
|
||||
elif isinstance(data, list):
|
||||
data = [decode_html_data(v) for v in data]
|
||||
elif isinstance(data, str):
|
||||
data = ox.decode_html(data)
|
||||
return data
|
||||
|
|
Loading…
Reference in a new issue