impawards every hours is all i want, kg should only allow imdbIds with digits nothing else

This commit is contained in:
j 2009-08-19 19:53:19 +02:00
parent 0600d39198
commit ddae01323a
2 changed files with 2 additions and 2 deletions

View file

@ -55,7 +55,7 @@ def getId(url):
def getIds(): def getIds():
ids = [] ids = []
html = getUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 0) html = getUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
pages = int(findRe(html, '<a href= page(.*?).html>')) + 1 pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
for page in range(pages, 0, -1): for page in range(pages, 0, -1):
for id in getIdsByPage(page): for id in getIdsByPage(page):

View file

@ -38,7 +38,7 @@ def getData(id):
if string: if string:
data['genre'].append(string) data['genre'].append(string)
data['id'] = id data['id'] = id
data['imdbId'] = findRe(html, 'imdb.com/title/tt(.*?)/?"') data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
data['language'] = stripTags(parseTable(html, 'Language')) data['language'] = stripTags(parseTable(html, 'Language'))
data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)')) data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)'))
data['link'] = stripTags(parseTable(html, 'Internet Link')) data['link'] = stripTags(parseTable(html, 'Internet Link'))