criterion is https now

This commit is contained in:
j 2018-07-09 15:20:00 +02:00
parent 17deba03f2
commit d21b0b675c

View file

@ -14,7 +14,7 @@ def get_id(url):
return url.split("/")[-1] return url.split("/")[-1]
def get_url(id): def get_url(id):
return "http://www.criterion.com/films/%s" % id return "https://www.criterion.com/films/%s" % id
def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False): def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
''' '''
@ -67,7 +67,7 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
data["posters"] = [result] data["posters"] = [result]
else: else:
html_ = read_url(result, unicode=True) html_ = read_url(result, unicode=True)
result = find_re(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id) result = find_re(html_, '//www.criterion.com/films/%s.*?">(.*?)</a>' % id)
result = find_re(result, "src=\"(.*?)\"") result = find_re(result, "src=\"(.*?)\"")
if result: if result:
data["posters"] = [result.replace("_w100", "")] data["posters"] = [result.replace("_w100", "")]
@ -102,7 +102,7 @@ def get_ids(page=None):
ids += results ids += results
results = re.compile("boxsets/(.*?)\"").findall(html) results = re.compile("boxsets/(.*?)\"").findall(html)
for result in results: for result in results:
html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True) html = read_url("https://www.criterion.com/boxsets/" + result, unicode=True)
results = re.compile("films/(\d+)-").findall(html) results = re.compile("films/(\d+)-").findall(html)
ids += results ids += results
return sorted(set(ids), key=int) return sorted(set(ids), key=int)