From 758acfe01b6418f42778d97fe4881858804a7581 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 7 May 2018 09:42:15 +0100 Subject: [PATCH] some criterion fixes --- ox/web/criterion.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/ox/web/criterion.py b/ox/web/criterion.py index 2d5aa86..ba6541e 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -36,15 +36,24 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): html = ox.cache.read_url(data["url"], timeout=timeout) data["number"] = find_re(html, "
  • Spine #(\d+)") - data["title"] = decode_html(find_re(html, "

    (.*?)

    ")) + data["title"] = decode_html(find_re(html, "

    (.*?)

    ")) data["title"] = data["title"].split(u' \u2014 The Television Version')[0].strip() - data["director"] = strip_tags(find_re(html, "

    (.*?)

    ")) - results = find_re(html, '
    (.*?)
    ') - results = re.compile("
  • (.*?)
  • ").findall(results) - data["country"] = results[0] - data["year"] = results[1] + results = find_re(html, '') + info = re.compile('
  • (.*?)
  • ', re.DOTALL).findall(results) + info = {k: strip_tags(v).strip() for k, v in info} + if 'director' in info: + data['director'] =info['director'] + if 'countryOfOrigin' in info: + data['country'] =info['countryOfOrigin'] + if 'inLanguage' in info: + data['language'] =info['inLanguage'] + for v in re.compile('
  • (.*?)
  • ', re.DOTALL).findall(results): + if 'datePublished' in v: + data['year'] = strip_tags(v).strip() + elif 'duration' in v: + data['duration'] = strip_tags(v).strip() data["synopsis"] = decode_html(strip_tags(find_re(html, - "
    .*?

    (.*?)

    "))) + "
    .*?

    (.*?)

    "))) result = find_re(html, "
    (.*?)
    ") if 'Blu-Ray' in result or 'Essential Art House DVD' in result: @@ -63,6 +72,12 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): else: data["posters"] = [] data['posters'] = [re.sub('(\?\d+)$', '', p) for p in data['posters']] + data['posters'] = [p for p in data['posters'] if p] + + posters = find_re(html, '
    (.*?)
    ') + for poster in re.compile('