From fc1efbc7fe72363bfa10a2ed80b1c978da137eb6 Mon Sep 17 00:00:00 2001 From: j Date: Mon, 19 Sep 2016 10:55:14 +0200 Subject: [PATCH] py3 --- oxdata/movie/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oxdata/movie/models.py b/oxdata/movie/models.py index 3f4418c..05789e3 100644 --- a/oxdata/movie/models.py +++ b/oxdata/movie/models.py @@ -240,13 +240,13 @@ def get_new_ids(timeout=-1): with open(new_ids_cache) as fd: new_ids = set(json.load(fd)) else: - robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout) + robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout).decode('utf-8') sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0] sitemap = ox.cache.read_url(sitemap_url, timeout=timeout) urls = re.compile('(.+?)').findall(sitemap) ids = set() for url in sorted(urls, reverse=False): - s = ox.cache.read_url(url, timeout=timeout) + s = ox.cache.read_url(url, timeout=timeout).decode('utf-8') ids |= set(re.compile('http://www.imdb.com/title/tt(\d{7})/combined').findall(s)) #print url, len(ids) known_ids = frozenset([i['imdb'] for i in Imdb.objects.all().values('imdb')])