py3 (one more)

This commit is contained in:
j 2016-09-19 10:56:23 +02:00
parent fc1efbc7fe
commit a569629373

View file

@ -242,7 +242,7 @@ def get_new_ids(timeout=-1):
else: else:
robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout).decode('utf-8') robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout).decode('utf-8')
sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0] sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0]
sitemap = ox.cache.read_url(sitemap_url, timeout=timeout) sitemap = ox.cache.read_url(sitemap_url, timeout=timeout).decode('utf-8')
urls = re.compile('<loc>(.+?)</loc>').findall(sitemap) urls = re.compile('<loc>(.+?)</loc>').findall(sitemap)
ids = set() ids = set()
for url in sorted(urls, reverse=False): for url in sorted(urls, reverse=False):