criterion ids
This commit is contained in:
parent
55b9e8bd8f
commit
80cb6ac7fb
1 changed files with 8 additions and 15 deletions
|
@ -96,23 +96,16 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
||||||
|
|
||||||
def get_ids(page=None):
|
def get_ids(page=None):
|
||||||
ids = []
|
ids = []
|
||||||
if page:
|
html = read_url("https://www.criterion.com/shop/browse/list?sort=spine_number", unicode=True)
|
||||||
url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
|
results = re.compile("films/(\d+)-").findall(html)
|
||||||
html = read_url(url, unicode=True)
|
ids += results
|
||||||
results = re.compile("films/(\d+)").findall(html)
|
results = re.compile("boxsets/(.*?)\"").findall(html)
|
||||||
|
for result in results:
|
||||||
|
html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
|
||||||
|
results = re.compile("films/(\d+)-").findall(html)
|
||||||
ids += results
|
ids += results
|
||||||
results = re.compile("boxsets/(.*?)\"").findall(html)
|
|
||||||
for result in results:
|
|
||||||
html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
|
|
||||||
results = re.compile("films/(\d+)").findall(html)
|
|
||||||
ids += results
|
|
||||||
return set(ids)
|
|
||||||
html = read_url("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine", unicode=True)
|
|
||||||
results = re.compile("\&p=(\d+)\&").findall(html)
|
|
||||||
pages = max(map(int, results))
|
|
||||||
for page in range(1, pages):
|
|
||||||
ids += get_ids(page)
|
|
||||||
return sorted(set(ids), key=int)
|
return sorted(set(ids), key=int)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(get_ids())
|
print(get_ids())
|
||||||
|
|
Loading…
Reference in a new issue