only get id from criterion

This commit is contained in:
j 2010-07-18 20:39:26 +02:00
parent b3345523b4
commit 2050c9deae

View file

@ -75,13 +75,13 @@ def getIds():
def getIdsByPage(page):
ids = []
html = readUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page)
results = re.compile("films/(.*?)\"").findall(html)
results = re.compile("films/(\d+)").findall(html)
for result in results:
ids.append(result)
results = re.compile("boxsets/(.*?)\"").findall(html)
for result in results:
html = readUrlUnicode("http://www.criterion.com/boxsets/" + result)
results = re.compile("films/(.*?)\"").findall(html)
results = re.compile("films/(\d+)").findall(html)
for result in results:
ids.append(result)
return set(ids)