lastId has to be string, cache kg files for 24 hours

This commit is contained in:
j 2009-07-15 16:10:33 +02:00
parent 8e2565ccf8
commit c0d9627e69

View file

@ -78,6 +78,7 @@ def getId(url):
return url.split("=")[-1] return url.split("=")[-1]
def getIds(lastId = 20): def getIds(lastId = 20):
lastId = '%s' % lastId
ids = [] ids = []
page = 0 page = 0
while True: while True:
@ -86,14 +87,13 @@ def getIds(lastId = 20):
ids.append(id) ids.append(id)
if lastId in ids: if lastId in ids:
break break
print page, len(ids)
page += 1 page += 1
return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids)))) return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
def getIdsByPage(page): def getIdsByPage(page):
ids = [] ids = []
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
html = getUrlUnicode(url, timeout = -1) html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
strings = html.split('<td width="42" style="padding:0px;">') strings = html.split('<td width="42" style="padding:0px;">')
strings.pop(0) strings.pop(0)
for string in strings: for string in strings: