diff --git a/oxdata/movie/imdbids.py b/oxdata/movie/imdbids.py index 232309f..e573dac 100644 --- a/oxdata/movie/imdbids.py +++ b/oxdata/movie/imdbids.py @@ -66,8 +66,9 @@ def get_film_count(year, month=None, day=None): total = re.compile('([\d,]+) titles.', re.DOTALL).findall(data) if total: return int(total[0].replace(',', '')) - print('no movies', url) - ox.web.imdb.delete_url(url) + if year > 1900: + print('no movies', url) + ox.web.imdb.delete_url(url) return 0 def get_path(name): @@ -83,24 +84,23 @@ def print_info(key, film_count, film_counts): extra = '' print('update', key, 'now has', film_count, 'films', extra) -def update_year(year, film_counts): - key = '%s' % year - if film_counts[key] > MAX_PER_RANGE: +def update_year(year, year_count, film_counts): + if year_count > MAX_PER_RANGE: for month in range(1, 13): key = '%04d-%02d' % (year, month) film_count = get_film_count(year, month) if film_count != film_counts.get(key): print_info(key, film_count, film_counts) + update_month(year, month, film_count, film_counts) film_counts[key] = film_count - update_month(year, month, film_counts) else: r = update_ids(year) - save_film_counts(film_counts) + film_counts['%s' % year] = year_count + save_film_counts(film_counts) -def update_month(year, month, film_counts): +def update_month(year, month, month_count, film_counts): key = '%04d-%02d' % (year, month) - if film_counts[key] > MAX_PER_RANGE: - month_total = film_counts[key] + if month_count > MAX_PER_RANGE: days_total = 0 days = calendar.monthrange(year, month)[1] for day in range(1, days + 1): @@ -109,14 +109,16 @@ def update_month(year, month, film_counts): days_total += film_count if film_count != film_counts.get(key): print_info(key, film_count, film_counts) - film_counts[key] = film_count r = update_ids(year, month, day, expected=film_count) + film_counts[key] = film_count save_film_counts(film_counts) - if days_total != month_total: + if days_total != month_count: print('!! month and days don\'t add up: %s month vs %s days total' % (month_total, days_total)) else: r = update_ids(year, month) - save_film_counts(film_counts) + key = '%04d-%02d' % (year, month) + film_counts[key] = month_count + save_film_counts(film_counts) def parse_cast(string): results = {} @@ -263,8 +265,7 @@ def update_index(from_year=None): key = '%s' % year if film_count != film_counts.get(key): print_info(key, film_count, film_counts) - film_counts[key] = film_count - update_year(year, film_counts) + update_year(year, film_count, film_counts) save_film_counts(film_counts) def get_unknown_ids(known_ids):