fix imdb id scanner

This commit is contained in:
j 2019-08-02 14:35:44 +02:00
parent 6b12cf24af
commit d5d45ad681
1 changed files with 6 additions and 5 deletions

View File

@ -160,19 +160,20 @@ def update_ids(year, month=None, day=None, sort=None):
for url in urls:
data = read_url(url, timeout=TIMEOUT)
n = True
page = 2
step = 50
start = 1
while n:
n = re.compile('Next &#187;</a>', re.DOTALL).findall(data)
if n:
n = '%s&page=%s' % (url, page)
page += 1
n = '%s&start=%s' % (url, start)
start += step
doc = lxml.html.fromstring(data)
article = doc.find_class('article')
if article:
article = article[0]
else:
print('no article on', '%s&page=%s' % (url, page-2))
ox.web.imdb.delete_url('%s&page=%s' % (url, page-2))
print('no article on', '%s&start=%s' % (url, start - 2*step))
ox.web.imdb.delete_url('%s&start=%s' % (url, start - 2*step))
break
for content in article.find_class('lister-item-content'):
header = content.find_class('lister-item-header')[0]