update count after index was updated

This commit is contained in:
j 2019-08-05 19:42:08 +02:00
parent d47f1c6a61
commit ccff7572d0

View file

@ -66,8 +66,9 @@ def get_film_count(year, month=None, day=None):
total = re.compile('<span>([\d,]+) titles.</span>', re.DOTALL).findall(data) total = re.compile('<span>([\d,]+) titles.</span>', re.DOTALL).findall(data)
if total: if total:
return int(total[0].replace(',', '')) return int(total[0].replace(',', ''))
print('no movies', url) if year > 1900:
ox.web.imdb.delete_url(url) print('no movies', url)
ox.web.imdb.delete_url(url)
return 0 return 0
def get_path(name): def get_path(name):
@ -83,24 +84,23 @@ def print_info(key, film_count, film_counts):
extra = '' extra = ''
print('update', key, 'now has', film_count, 'films', extra) print('update', key, 'now has', film_count, 'films', extra)
def update_year(year, film_counts): def update_year(year, year_count, film_counts):
key = '%s' % year if year_count > MAX_PER_RANGE:
if film_counts[key] > MAX_PER_RANGE:
for month in range(1, 13): for month in range(1, 13):
key = '%04d-%02d' % (year, month) key = '%04d-%02d' % (year, month)
film_count = get_film_count(year, month) film_count = get_film_count(year, month)
if film_count != film_counts.get(key): if film_count != film_counts.get(key):
print_info(key, film_count, film_counts) print_info(key, film_count, film_counts)
update_month(year, month, film_count, film_counts)
film_counts[key] = film_count film_counts[key] = film_count
update_month(year, month, film_counts)
else: else:
r = update_ids(year) r = update_ids(year)
save_film_counts(film_counts) film_counts['%s' % year] = year_count
save_film_counts(film_counts)
def update_month(year, month, film_counts): def update_month(year, month, month_count, film_counts):
key = '%04d-%02d' % (year, month) key = '%04d-%02d' % (year, month)
if film_counts[key] > MAX_PER_RANGE: if month_count > MAX_PER_RANGE:
month_total = film_counts[key]
days_total = 0 days_total = 0
days = calendar.monthrange(year, month)[1] days = calendar.monthrange(year, month)[1]
for day in range(1, days + 1): for day in range(1, days + 1):
@ -109,14 +109,16 @@ def update_month(year, month, film_counts):
days_total += film_count days_total += film_count
if film_count != film_counts.get(key): if film_count != film_counts.get(key):
print_info(key, film_count, film_counts) print_info(key, film_count, film_counts)
film_counts[key] = film_count
r = update_ids(year, month, day, expected=film_count) r = update_ids(year, month, day, expected=film_count)
film_counts[key] = film_count
save_film_counts(film_counts) save_film_counts(film_counts)
if days_total != month_total: if days_total != month_count:
print('!! month and days don\'t add up: %s month vs %s days total' % (month_total, days_total)) print('!! month and days don\'t add up: %s month vs %s days total' % (month_total, days_total))
else: else:
r = update_ids(year, month) r = update_ids(year, month)
save_film_counts(film_counts) key = '%04d-%02d' % (year, month)
film_counts[key] = month_count
save_film_counts(film_counts)
def parse_cast(string): def parse_cast(string):
results = {} results = {}
@ -263,8 +265,7 @@ def update_index(from_year=None):
key = '%s' % year key = '%s' % year
if film_count != film_counts.get(key): if film_count != film_counts.get(key):
print_info(key, film_count, film_counts) print_info(key, film_count, film_counts)
film_counts[key] = film_count update_year(year, film_count, film_counts)
update_year(year, film_counts)
save_film_counts(film_counts) save_film_counts(film_counts)
def get_unknown_ids(known_ids): def get_unknown_ids(known_ids):