more criterion fixes
This commit is contained in:
parent
228ec9dbd2
commit
bd64a2dba3
1 changed files with 5 additions and 4 deletions
|
@ -33,7 +33,8 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
||||||
try:
|
try:
|
||||||
html = read_url(data["url"], timeout=timeout, unicode=True)
|
html = read_url(data["url"], timeout=timeout, unicode=True)
|
||||||
except:
|
except:
|
||||||
html = ox.cache.read_url(data["url"], timeout=timeout)
|
html = read_url(data["url"], timeout=timeout).decode('utf-8', 'ignore')
|
||||||
|
|
||||||
data["number"] = find_re(html, "<b>Spine #(\d+)")
|
data["number"] = find_re(html, "<b>Spine #(\d+)")
|
||||||
|
|
||||||
data["title"] = decode_html(find_re(html, "<h1 class=\"header__primarytitle\".*?>(.*?)</h1>"))
|
data["title"] = decode_html(find_re(html, "<h1 class=\"header__primarytitle\".*?>(.*?)</h1>"))
|
||||||
|
@ -42,11 +43,11 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
||||||
info = re.compile('<li itemprop="(.*?)".*?>(.*?)</li>', re.DOTALL).findall(results)
|
info = re.compile('<li itemprop="(.*?)".*?>(.*?)</li>', re.DOTALL).findall(results)
|
||||||
info = {k: strip_tags(v).strip() for k, v in info}
|
info = {k: strip_tags(v).strip() for k, v in info}
|
||||||
if 'director' in info:
|
if 'director' in info:
|
||||||
data['director'] =info['director']
|
data['director'] = info['director']
|
||||||
if 'countryOfOrigin' in info:
|
if 'countryOfOrigin' in info:
|
||||||
data['country'] =info['countryOfOrigin']
|
data['country'] = [c.strip() for c in decode_html(info['countryOfOrigin']).split(', ')]
|
||||||
if 'inLanguage' in info:
|
if 'inLanguage' in info:
|
||||||
data['language'] =info['inLanguage']
|
data['language'] = [l.strip() for l in decode_html(info['inLanguage']).split(', ')]
|
||||||
for v in re.compile('<li>(.*?)</li>', re.DOTALL).findall(results):
|
for v in re.compile('<li>(.*?)</li>', re.DOTALL).findall(results):
|
||||||
if 'datePublished' in v:
|
if 'datePublished' in v:
|
||||||
data['year'] = strip_tags(v).strip()
|
data['year'] = strip_tags(v).strip()
|
||||||
|
|
Loading…
Reference in a new issue