fix criterion parser

This commit is contained in:
j 2021-08-07 11:30:23 +02:00
parent 887760acc1
commit 2172bcb3fb

View file

@ -43,8 +43,12 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
results = find_re(html, '<ul class="film-meta-list">(.*?)</ul>') results = find_re(html, '<ul class="film-meta-list">(.*?)</ul>')
info = re.compile('<li itemprop="(.*?)".*?>(.*?)</li>', re.DOTALL).findall(results) info = re.compile('<li itemprop="(.*?)".*?>(.*?)</li>', re.DOTALL).findall(results)
info = {k: strip_tags(v).strip() for k, v in info} info = {k: strip_tags(v).strip() for k, v in info}
meta = re.compile('<meta.*? name="(.*?)".*? content="(.*?)"', re.DOTALL).findall(html)
meta = {k: v.strip() for k, v in meta}
if 'director' in info: if 'director' in info:
data['director'] = info['director'] data['director'] = info['director']
elif 'director' in meta:
data['director'] = meta['director']
if 'countryOfOrigin' in info: if 'countryOfOrigin' in info:
data['country'] = [c.strip() for c in decode_html(info['countryOfOrigin']).split(', ')] data['country'] = [c.strip() for c in decode_html(info['countryOfOrigin']).split(', ')]
if 'inLanguage' in info: if 'inLanguage' in info:
@ -80,7 +84,8 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
for poster in re.compile('<img src="(.*?)"').findall(posters): for poster in re.compile('<img src="(.*?)"').findall(posters):
data['posters'].append(poster) data['posters'].append(poster)
result = find_re(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"") result = re.compile('<div class="gallery-item ">.*?src="(.*?)"', re.DOTALL).findall(html)
#result = find_re(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"")
if result: if result:
data["stills"] = [result] data["stills"] = [result]
data["trailers"] = [] data["trailers"] = []