py3 (one more)
This commit is contained in:
parent
fc1efbc7fe
commit
a569629373
1 changed files with 1 additions and 1 deletions
|
@ -242,7 +242,7 @@ def get_new_ids(timeout=-1):
|
||||||
else:
|
else:
|
||||||
robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout).decode('utf-8')
|
robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout).decode('utf-8')
|
||||||
sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0]
|
sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0]
|
||||||
sitemap = ox.cache.read_url(sitemap_url, timeout=timeout)
|
sitemap = ox.cache.read_url(sitemap_url, timeout=timeout).decode('utf-8')
|
||||||
urls = re.compile('<loc>(.+?)</loc>').findall(sitemap)
|
urls = re.compile('<loc>(.+?)</loc>').findall(sitemap)
|
||||||
ids = set()
|
ids = set()
|
||||||
for url in sorted(urls, reverse=False):
|
for url in sorted(urls, reverse=False):
|
||||||
|
|
Loading…
Reference in a new issue