From 84c15c690a58dd60beb1eecd12f6d8336946714d Mon Sep 17 00:00:00 2001 From: j Date: Fri, 8 Jan 2016 15:52:07 +0530 Subject: [PATCH] import text not html --- oml/meta/__init__.py | 3 +++ oml/meta/google.py | 3 ++- oml/meta/utils.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/oml/meta/__init__.py b/oml/meta/__init__.py index 0581594..4aa73b6 100644 --- a/oml/meta/__init__.py +++ b/oml/meta/__init__.py @@ -13,6 +13,8 @@ from . import worldcat from . import google from . import duckduckgo +from .utils import decode_html_data + from oml import settings import logging @@ -95,6 +97,7 @@ def lookup(key, value): for key in [k['id'] for k in settings.config['itemKeys'] if isinstance(k['type'], list)]: if key in data and not isinstance(data[key], list): data[key] = [data[key]] + data = decode_html_data(data) return data def isvalid_id(key, value): diff --git a/oml/meta/google.py b/oml/meta/google.py index 677087e..10a7a07 100644 --- a/oml/meta/google.py +++ b/oml/meta/google.py @@ -6,7 +6,7 @@ from ox.cache import get_json, store import ox.web.google import stdnum.isbn -from .utils import find_isbns, get_language +from .utils import find_isbns, get_language, decode_html_data import logging logger = logging.getLogger(__name__) @@ -84,5 +84,6 @@ def info(key, value): data['publisher'] = [data['publisher']] if 'language' in _data: data['language'] = [get_language(_data['language'])] + data = decode_html_data(data) return data diff --git a/oml/meta/utils.py b/oml/meta/utils.py index 652e98f..248abe4 100644 --- a/oml/meta/utils.py +++ b/oml/meta/utils.py @@ -24,3 +24,13 @@ def find_isbns(text): def get_language(lang): return ox.iso.codeToLang(lang.split('-')[0]) or lang + +def decode_html_data(data): + if isinstance(data, dict): + for key in data: + data[key] = decode_html_data(data[key]) + elif isinstance(data, list): + data = [decode_html_data(v) for v in data] + elif isinstance(data, str): + data = ox.decode_html(data) + return data