import text not html

This commit is contained in:
j 2016-01-08 15:52:07 +05:30
parent 97006b0b3e
commit 84c15c690a
3 changed files with 15 additions and 1 deletions

View file

@ -13,6 +13,8 @@ from . import worldcat
from . import google from . import google
from . import duckduckgo from . import duckduckgo
from .utils import decode_html_data
from oml import settings from oml import settings
import logging import logging
@ -95,6 +97,7 @@ def lookup(key, value):
for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]: for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]:
if key in data and not isinstance(data[key], list): if key in data and not isinstance(data[key], list):
data[key] = [data[key]] data[key] = [data[key]]
data = decode_html_data(data)
return data return data
def isvalid_id(key, value): def isvalid_id(key, value):

View file

@ -6,7 +6,7 @@ from ox.cache import get_json, store
import ox.web.google import ox.web.google
import stdnum.isbn import stdnum.isbn
from .utils import find_isbns, get_language from .utils import find_isbns, get_language, decode_html_data
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -84,5 +84,6 @@ def info(key, value):
data['publisher'] = [data['publisher']] data['publisher'] = [data['publisher']]
if 'language' in _data: if 'language' in _data:
data['language'] = [get_language(_data['language'])] data['language'] = [get_language(_data['language'])]
data = decode_html_data(data)
return data return data

View file

@ -24,3 +24,13 @@ def find_isbns(text):
def get_language(lang): def get_language(lang):
return ox.iso.codeToLang(lang.split('-')[0]) or lang return ox.iso.codeToLang(lang.split('-')[0]) or lang
def decode_html_data(data):
if isinstance(data, dict):
for key in data:
data[key] = decode_html_data(data[key])
elif isinstance(data, list):
data = [decode_html_data(v) for v in data]
elif isinstance(data, str):
data = ox.decode_html(data)
return data