fix imdb id scanner

This commit is contained in:
j 2019-08-02 14:35:44 +02:00
parent 6b12cf24af
commit d5d45ad681

View file

@ -160,19 +160,20 @@ def update_ids(year, month=None, day=None, sort=None):
for url in urls: for url in urls:
data = read_url(url, timeout=TIMEOUT) data = read_url(url, timeout=TIMEOUT)
n = True n = True
page = 2 step = 50
start = 1
while n: while n:
n = re.compile('Next &#187;</a>', re.DOTALL).findall(data) n = re.compile('Next &#187;</a>', re.DOTALL).findall(data)
if n: if n:
n = '%s&page=%s' % (url, page) n = '%s&start=%s' % (url, start)
page += 1 start += step
doc = lxml.html.fromstring(data) doc = lxml.html.fromstring(data)
article = doc.find_class('article') article = doc.find_class('article')
if article: if article:
article = article[0] article = article[0]
else: else:
print('no article on', '%s&page=%s' % (url, page-2)) print('no article on', '%s&start=%s' % (url, start - 2*step))
ox.web.imdb.delete_url('%s&page=%s' % (url, page-2)) ox.web.imdb.delete_url('%s&start=%s' % (url, start - 2*step))
break break
for content in article.find_class('lister-item-content'): for content in article.find_class('lister-item-content'):
header = content.find_class('lister-item-header')[0] header = content.find_class('lister-item-header')[0]