criterion ids
This commit is contained in:
parent
55b9e8bd8f
commit
80cb6ac7fb
1 changed files with 8 additions and 15 deletions
|
@ -96,23 +96,16 @@ def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
|
|||
|
||||
def get_ids(page=None):
|
||||
ids = []
|
||||
if page:
|
||||
url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
|
||||
html = read_url(url, unicode=True)
|
||||
results = re.compile("films/(\d+)").findall(html)
|
||||
html = read_url("https://www.criterion.com/shop/browse/list?sort=spine_number", unicode=True)
|
||||
results = re.compile("films/(\d+)-").findall(html)
|
||||
ids += results
|
||||
results = re.compile("boxsets/(.*?)\"").findall(html)
|
||||
for result in results:
|
||||
html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
|
||||
results = re.compile("films/(\d+)-").findall(html)
|
||||
ids += results
|
||||
results = re.compile("boxsets/(.*?)\"").findall(html)
|
||||
for result in results:
|
||||
html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
|
||||
results = re.compile("films/(\d+)").findall(html)
|
||||
ids += results
|
||||
return set(ids)
|
||||
html = read_url("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine", unicode=True)
|
||||
results = re.compile("\&p=(\d+)\&").findall(html)
|
||||
pages = max(map(int, results))
|
||||
for page in range(1, pages):
|
||||
ids += get_ids(page)
|
||||
return sorted(set(ids), key=int)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(get_ids())
|
||||
|
|
Loading…
Reference in a new issue