fix count parser

This commit is contained in:
j 2019-05-02 12:48:29 +02:00
parent 322213ad57
commit 893abbe9d5

View file

@ -30,7 +30,7 @@ def read_url(url, timeout):
return data
def get_range(from_, to):
base_url = 'http://www.imdb.com'
base_url = 'https://www.imdb.com'
url = '%s/search/title?adult=include&release_date=%s,%s&sort=release_date,asc&count=50' % (base_url, from_, to)
return url
@ -54,9 +54,9 @@ def get_film_count(year, month=None, day=None):
else:
url = get_year(year)
data = read_url(url, timeout=TIMEOUT)
total = re.compile('<span class="lister-current-last-item">50</span>.*?of (.*?) titles', re.DOTALL).findall(data)
total = re.compile('<span>1-50 of ([\d,]+?) titles.</span>').findall(data)
if not total:
total = re.compile(' ([\d+,]+) titles\n', re.DOTALL).findall(data)
total = re.compile(' ([\d,]+) titles\n', re.DOTALL).findall(data)
if total:
return int(total[0].replace(',', ''))
print('no movies', url)