oxdata/oxdata/movie/models.py
2011-10-14 14:55:45 +02:00

98 lines
3.5 KiB
Python

# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import re
from django.db import models
import ox
def find(info):
q = Imdb.objects.all()
for key in Imdb.keys:
if key in info and info[key]:
if isinstance(info[key], basestring):
fkey = '%s__iexact'%key
else:
fkey = key
if isinstance(info[key], list):
q = q.filter(**{fkey: '\n'.join(info[key]) + '\n'})
else:
q = q.filter(**{fkey:info[key]})
if q.count() == 1:
return q[0]
return None
class Imdb(models.Model):
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
imdb = models.CharField(max_length=7, unique=True)
title = models.CharField(max_length=1000, blank=True, default='')
year = models.CharField(max_length=4, blank=True, default='')
director = models.CharField(max_length=9000, blank=True, default='')
season = models.IntegerField(blank=True, null=True)
episode = models.IntegerField(blank=True, null=True)
episodeTitle = models.CharField(max_length=1000, blank=True, default='')
episodeYear = models.CharField(max_length=4, blank=True, default='')
episodeDirector = models.CharField(max_length=1000, blank=True, default='')
def __unicode__(self):
return u"%s (%s)" % (self.title, self.imdb)
keys = ('title', 'director', 'year', 'season', 'episode',
'episodeTitle', 'episodeYear', 'episodeDirector')
def update(self):
info = ox.web.imdb.ImdbCombined(self.imdb)
if info:
for key in self.keys:
ikey = {
'director': 'directors',
'episodeTitle': 'episode_title',
'episodeYear': 'episode_year',
'episodeDirector': 'episode_directors',
}.get(key, key)
if ikey in info:
if ikey in info:
value = info[ikey]
if ikey == 'title' and 'series_title' in info:
value = info['series_title']
if isinstance(value, list):
value = '\n'.join(value) + '\n'
setattr(self, key, value)
if self.season < 0:
self.season = None
if self.episode < 0:
self.episode = None
self.save()
def json(self):
j = {}
j['imdbId'] = self.imdb
for key in self.keys:
j[key] = getattr(self, key)
for key in ('director', 'episodeDirector'):
if j[key].strip():
j[key] = j[key].strip().split('\n')
else:
del j[key]
for key in j.keys():
if not j[key]:
del j[key]
return j
def get_new_ids(timeout=-1):
robot = ox.cache.readUrl('http://www.imdb.com/robots.txt', timeout=timeout)
sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0]
sitemap = ox.cache.readUrl(sitemap_url, timeout=timeout)
urls = re.compile('<loc>(.+?)</loc>').findall(sitemap)
for url in sorted(urls, reverse=True):
print url
s = ox.cache.readUrl(url, timeout=timeout)
ids = re.compile('<loc>http://www.imdb.com/title/tt(\d{7})/combined</loc>').findall(s)
for i in ids:
m, created = Imdb.objects.get_or_create(imdb=i)
if created:
m.update()