diff --git a/ox/web/criterion.py b/ox/web/criterion.py index 93636d7..2e81ce1 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -5,7 +5,7 @@ import re import ox.cache from ox.cache import read_url -from ox.html import strip_tags +from ox.html import strip_tags, decode_html from ox.text import find_re import imdb @@ -36,14 +36,15 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): html = ox.cache.read_url(data["url"], timeout=timeout) data["number"] = find_re(html, "
  • Spine #(\d+)") - data["title"] = find_re(html, "

    (.*?)

    ") - data["title"] = data["title"].split(u' \u2014 The Television Version')[0] + data["title"] = decode_html(find_re(html, "

    (.*?)

    ")) + data["title"] = data["title"].split(u' \u2014 The Television Version')[0].strip() data["director"] = strip_tags(find_re(html, "

    (.*?)

    ")) results = find_re(html, '
    (.*?)
    ') results = re.compile("
  • (.*?)
  • ").findall(results) data["country"] = results[0] data["year"] = results[1] - data["synopsis"] = strip_tags(find_re(html, "
    .*?

    (.*?)

    ")) + data["synopsis"] = decode_html(strip_tags(find_re(html, + "
    .*?

    (.*?)

    "))) result = find_re(html, "
    (.*?)
    ") if 'Blu-Ray' in result or 'Essential Art House DVD' in result: