Compare commits
No commits in common. "501fe8cd3e1bd33b29eac6ffce85b4404eb46b61" and "fe06a8c6645396f6c6149b896f9509e3c556ef7d" have entirely different histories.
501fe8cd3e
...
fe06a8c664
3 changed files with 4 additions and 54 deletions
|
|
@ -43,7 +43,7 @@ def get_ids():
|
|||
print('missing impawards', ox.web.impawards.get_url(id))
|
||||
|
||||
for id in ox.web.criterion.get_ids():
|
||||
if id in ('626', '835', '1079', '28907'):
|
||||
if id in ('626', '835'):
|
||||
continue
|
||||
if models.MovieId.objects.all().filter(criterion_id=id).count() == 0:
|
||||
print('criterion', id)
|
||||
|
|
|
|||
|
|
@ -46,8 +46,6 @@ def get_film_count(year, month=None, day=None):
|
|||
url = get_year(year)
|
||||
data = ox.web.imdb.read_url(url, unicode=True, timeout=TIMEOUT)
|
||||
total = re.compile('<span class="lister-current-last-item">50</span>.*?of (.*?) titles', re.DOTALL).findall(data)
|
||||
if not total:
|
||||
total = re.compile(' ([\d+,]+) titles\n', re.DOTALL).findall(data)
|
||||
if total:
|
||||
return int(total[0].replace(',', ''))
|
||||
print('no movies', url)
|
||||
|
|
@ -116,19 +114,6 @@ def update_month(year, month, film_counts):
|
|||
print('%s: count %s, got ids %s' % (key, film_counts[key], r))
|
||||
save_film_counts(film_counts)
|
||||
|
||||
def parse_cast(string):
|
||||
results = {}
|
||||
for part in string.split('|'):
|
||||
cast = iter([t.strip() for t in part.split(':\n')])
|
||||
cast = dict(zip(cast, cast))
|
||||
for key in cast:
|
||||
rkey = key.lower()
|
||||
rkey = {
|
||||
'director': 'directors',
|
||||
'star': 'stars',
|
||||
}.get(rkey, rkey)
|
||||
results[rkey] = cast[key].split(', \n')
|
||||
return results
|
||||
|
||||
def update_ids(year, month=None, day=None, sort=None):
|
||||
films = {}
|
||||
|
|
@ -164,20 +149,10 @@ def update_ids(year, month=None, day=None, sort=None):
|
|||
else:
|
||||
print('no article on', '%s&page=%s' % (url, page-2))
|
||||
break
|
||||
for content in article.find_class('lister-item-content'):
|
||||
header = content.find_class('lister-item-header')[0]
|
||||
a = header.xpath('.//a')
|
||||
if 'Episode:' in [
|
||||
e.text_content()
|
||||
for e in header.xpath(".//small")
|
||||
] and len(a) > 1:
|
||||
title = a[0].text_content().strip() + ': '
|
||||
a = a[1]
|
||||
else:
|
||||
title = ''
|
||||
a = a[0]
|
||||
for header in article.find_class('lister-item-header'):
|
||||
a = header.xpath('.//a')[0]
|
||||
id = re.compile('title/tt(\d{7})').findall(a.attrib['href'])[0]
|
||||
title += a.text_content().strip()
|
||||
title = a.text_content().strip()
|
||||
try:
|
||||
y = header.find_class('lister-item-year')[0].text_content()
|
||||
y = re.sub('\([^\d]+\)', '', y)
|
||||
|
|
@ -190,34 +165,11 @@ def update_ids(year, month=None, day=None, sort=None):
|
|||
print(n)
|
||||
print(header.find_class('lister-item-year')[0].text_content())
|
||||
raise
|
||||
|
||||
text = content.xpath(".//p[contains(@class, 'text-muted')]")
|
||||
plot = text[1].text_content().strip()
|
||||
plot = plot.replace('See full summary »', '').replace('See full summary\xa0»', '').strip()
|
||||
if plot == 'Add a Plot':
|
||||
plot = ''
|
||||
genre = content.find_class('genre')
|
||||
if genre:
|
||||
genre = genre[0].text_content().strip().split(', ')
|
||||
else:
|
||||
genre = []
|
||||
cast = content.xpath(".//p[contains(@class, '')]")
|
||||
cast = [t for t in cast if t.attrib.get('class') == '']
|
||||
if cast:
|
||||
cast = parse_cast(cast[0].text_content())
|
||||
|
||||
if id not in films:
|
||||
films[id] = {
|
||||
'title': title,
|
||||
'year': y
|
||||
}
|
||||
if plot:
|
||||
films[id]['plot'] = plot
|
||||
if genre:
|
||||
films[id]['genre'] = genre
|
||||
if cast:
|
||||
films[id].update(cast)
|
||||
|
||||
#print(key, len(films), 'films')
|
||||
if n:
|
||||
#print(n)
|
||||
|
|
|
|||
|
|
@ -28,8 +28,6 @@ actions.register(getIds)
|
|||
def getData(request, data):
|
||||
response = json_response()
|
||||
id = data['id']
|
||||
if isinstance(id, int):
|
||||
id = str(id)
|
||||
if len(id) == 7:
|
||||
i, created = models.Imdb.objects.get_or_create(imdb=id)
|
||||
if created:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue