lastId has to be string, cache kg files for 24 hours
This commit is contained in:
parent
8e2565ccf8
commit
c0d9627e69
1 changed files with 3 additions and 3 deletions
|
@ -78,6 +78,7 @@ def getId(url):
|
||||||
return url.split("=")[-1]
|
return url.split("=")[-1]
|
||||||
|
|
||||||
def getIds(lastId = 20):
|
def getIds(lastId = 20):
|
||||||
|
lastId = '%s' % lastId
|
||||||
ids = []
|
ids = []
|
||||||
page = 0
|
page = 0
|
||||||
while True:
|
while True:
|
||||||
|
@ -86,14 +87,13 @@ def getIds(lastId = 20):
|
||||||
ids.append(id)
|
ids.append(id)
|
||||||
if lastId in ids:
|
if lastId in ids:
|
||||||
break
|
break
|
||||||
print page, len(ids)
|
|
||||||
page += 1
|
page += 1
|
||||||
return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
|
return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
|
||||||
|
|
||||||
def getIdsByPage(page):
|
def getIdsByPage(page):
|
||||||
ids = []
|
ids = []
|
||||||
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
|
url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
|
||||||
html = getUrlUnicode(url, timeout = -1)
|
html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
|
||||||
strings = html.split('<td width="42" style="padding:0px;">')
|
strings = html.split('<td width="42" style="padding:0px;">')
|
||||||
strings.pop(0)
|
strings.pop(0)
|
||||||
for string in strings:
|
for string in strings:
|
||||||
|
|
Loading…
Reference in a new issue