From 77f34143f506cd3041317034d2b72d4052309d5b Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 2 Aug 2015 15:58:59 +0200 Subject: [PATCH] criterion: decode some html --- ox/web/criterion.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ox/web/criterion.py b/ox/web/criterion.py index 93636d7..2e81ce1 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -5,7 +5,7 @@ import re import ox.cache from ox.cache import read_url -from ox.html import strip_tags +from ox.html import strip_tags, decode_html from ox.text import find_re import imdb @@ -36,14 +36,15 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): html = ox.cache.read_url(data["url"], timeout=timeout) data["number"] = find_re(html, "
(.*?)
")) + data["synopsis"] = decode_html(strip_tags(find_re(html, + "(.*?)
"))) result = find_re(html, "