diff --git a/oxweb/karagarga.py b/oxweb/karagarga.py index 1678e69..bfdefbb 100644 --- a/oxweb/karagarga.py +++ b/oxweb/karagarga.py @@ -78,6 +78,7 @@ def getId(url): return url.split("=")[-1] def getIds(lastId = 20): + lastId = '%s' % lastId ids = [] page = 0 while True: @@ -86,14 +87,13 @@ def getIds(lastId = 20): ids.append(id) if lastId in ids: break - print page, len(ids) page += 1 return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids)))) def getIdsByPage(page): ids = [] url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page - html = getUrlUnicode(url, timeout = -1) + html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day strings = html.split('') strings.pop(0) for string in strings: @@ -111,4 +111,4 @@ def parseTable(html, title): if __name__ == "__main__": print getIds("79317") - print getData("79317") \ No newline at end of file + print getData("79317")