From 2050c9deae46dd95852e2d3ad04b8bbd70a95d32 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Sun, 18 Jul 2010 20:39:26 +0200 Subject: [PATCH] only get id from criterion --- ox/web/criterion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ox/web/criterion.py b/ox/web/criterion.py index c204360..4eb65bc 100644 --- a/ox/web/criterion.py +++ b/ox/web/criterion.py @@ -75,13 +75,13 @@ def getIds(): def getIdsByPage(page): ids = [] html = readUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page) - results = re.compile("films/(.*?)\"").findall(html) + results = re.compile("films/(\d+)").findall(html) for result in results: ids.append(result) results = re.compile("boxsets/(.*?)\"").findall(html) for result in results: html = readUrlUnicode("http://www.criterion.com/boxsets/" + result) - results = re.compile("films/(.*?)\"").findall(html) + results = re.compile("films/(\d+)").findall(html) for result in results: ids.append(result) return set(ids)