more criterion fixes

This commit is contained in:
j 2018-05-09 12:09:13 +01:00
parent 228ec9dbd2
commit bd64a2dba3

View file

@ -33,7 +33,8 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
try:
html = read_url(data["url"], timeout=timeout, unicode=True)
except:
html = ox.cache.read_url(data["url"], timeout=timeout)
html = read_url(data["url"], timeout=timeout).decode('utf-8', 'ignore')
data["number"] = find_re(html, "<b>Spine #(\d+)")
data["title"] = decode_html(find_re(html, "<h1 class=\"header__primarytitle\".*?>(.*?)</h1>"))
@ -42,11 +43,11 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
info = re.compile('<li itemprop="(.*?)".*?>(.*?)</li>', re.DOTALL).findall(results)
info = {k: strip_tags(v).strip() for k, v in info}
if 'director' in info:
data['director'] =info['director']
data['director'] = info['director']
if 'countryOfOrigin' in info:
data['country'] =info['countryOfOrigin']
data['country'] = [c.strip() for c in decode_html(info['countryOfOrigin']).split(', ')]
if 'inLanguage' in info:
data['language'] =info['inLanguage']
data['language'] = [l.strip() for l in decode_html(info['inLanguage']).split(', ')]
for v in re.compile('<li>(.*?)</li>', re.DOTALL).findall(results):
if 'datePublished' in v:
data['year'] = strip_tags(v).strip()