From bd64a2dba3f96b56bd8581257aa802100c4184be Mon Sep 17 00:00:00 2001 From: j Date: Wed, 9 May 2018 12:09:13 +0100 Subject: [PATCH] more criterion fixes --- ox/web/criterion.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ox/web/criterion.py b/ox/web/criterion.py index 67187fd..09cf212 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -33,7 +33,8 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): try: html = read_url(data["url"], timeout=timeout, unicode=True) except: - html = ox.cache.read_url(data["url"], timeout=timeout) + html = read_url(data["url"], timeout=timeout).decode('utf-8', 'ignore') + data["number"] = find_re(html, "Spine #(\d+)") data["title"] = decode_html(find_re(html, "

(.*?)

")) @@ -42,11 +43,11 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): info = re.compile('
  • (.*?)
  • ', re.DOTALL).findall(results) info = {k: strip_tags(v).strip() for k, v in info} if 'director' in info: - data['director'] =info['director'] + data['director'] = info['director'] if 'countryOfOrigin' in info: - data['country'] =info['countryOfOrigin'] + data['country'] = [c.strip() for c in decode_html(info['countryOfOrigin']).split(', ')] if 'inLanguage' in info: - data['language'] =info['inLanguage'] + data['language'] = [l.strip() for l in decode_html(info['inLanguage']).split(', ')] for v in re.compile('
  • (.*?)
  • ', re.DOTALL).findall(results): if 'datePublished' in v: data['year'] = strip_tags(v).strip()