use modules for additional ids, load virtualven in manage.py

This commit is contained in:
j 2010-08-05 13:47:19 +02:00
parent b817a10301
commit 03b484c261
7 changed files with 88 additions and 54 deletions

View file

@ -6,12 +6,12 @@ from django.conf import settings
from ox.cache import readUrlUnicode from ox.cache import readUrlUnicode
from ox import findRe from ox import findRe
import ox.web.criterion import ox.web.criterion
import ox.web.karagarga
import ox.web.imdb import ox.web.imdb
import ox.web.impawards import ox.web.impawards
import models import models
from oxdata.poster.models import PosterCache from oxdata.poster.models import PosterCache
import modules
def addPoster(m, url, site, site_id): def addPoster(m, url, site, site_id):
if PosterCache.objects.all().filter(url=url).count() == 0: if PosterCache.objects.all().filter(url=url).count() == 0:
@ -21,10 +21,11 @@ def addPoster(m, url, site, site_id):
def getIds(): def getIds():
for id in ox.web.impawards.getIds(): for id in ox.web.impawards.getIds():
if models.MovieId.objects.all().filter(impawards_id=id).count() == 0: if models.MovieId.objects.all().filter(impawards_id=id).count() == 0:
print 'impawards', id print 'impawards', ox.web.impawards.getUrl(id)
data = ox.web.impawards.getData(id) data = ox.web.impawards.getData(id)
if data and 'imdbId' in data: if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId']) m = models.getMovieIdByImdbId(data['imdbId'])
if m:
if not m.impawards_id: if not m.impawards_id:
m.impawards_id = id m.impawards_id = id
m.save() m.save()
@ -44,18 +45,5 @@ def getIds():
else: else:
print data['title'], "no imdbId" print data['title'], "no imdbId"
#kg modules.getIds.run()
lastId = models.Karagarga.maxId()
for id in ox.web.karagarga.getIds(lastId):
if models.Karagarga.objects.filter(karagarga_id=id).count() == 0:
print 'kg', id
data = ox.web.karagarga.getData(id)
if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId'])
kg = models.Karagarga()
kg.movie_id = m
kg.karagarga_id = id
kg.save()
for poster in data['posters']:
addPoster(m, poster, 'karagarga.net', kg.karagarga_id)

View file

@ -6,22 +6,31 @@ import hashlib
from django.db import models from django.db import models
from django.db.models import Q, Max from django.db.models import Q, Max
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.utils import simplejson
import ox
from ox import stripTags
import ox.web.imdb import ox.web.imdb
import ox.web.wikipedia import ox.web.wikipedia
from ox import stripTags
def getMovieIdByImdbId(imdb_id): def getMovieIdByImdbId(imdb_id):
#movies moved in imdb
imdb_id = {
'0377059': '0343663',
'0426560': '0088000',
}.get(imdb_id, imdb_id)
try: try:
m = MovieId.objects.get(imdb_id=imdb_id) m = MovieId.objects.get(imdb_id=imdb_id)
except MovieId.DoesNotExist: except MovieId.DoesNotExist:
#check if imdb_id actually exists on imdb
if ox.cache.exists('http://www.imdb.com/title/tt%s/combined'%imdb_id):
m = MovieId() m = MovieId()
m.imdb_id = imdb_id m.imdb_id = imdb_id
m.save() m.save()
#m.updateFromImdb() m.updateFromImdb()
else:
print imdb_id, "is not a valid id, failed loading movie"
return None
return m return m
class MovieId(models.Model): class MovieId(models.Model):
@ -117,13 +126,9 @@ class MovieId(models.Model):
value = getattr(self, key) value = getattr(self, key)
if value: if value:
json[key] = value json[key] = value
return simplejson.dumps(json, indent=4) if 'director' in json:
json['directors'] = json.pop('director').split(', ')
if 'year' in json and json['year']: json['year'] = int(json['year'])
return json
class Karagarga(models.Model):
movie_id = models.ForeignKey(MovieId, related_name='karagarga_ids', default=None)
karagarga_id = models.IntegerField(unique=True)
@classmethod
def maxId(cls):
return cls.objects.aggregate(Max('karagarga_id'))['karagarga_id__max']

15
oxdata/lookup/modules.py Normal file
View file

@ -0,0 +1,15 @@
class IdModules(object):
def __init__(self):
self._registry = {}
def register(self, name, getIds):
if name not in self._registry:
self._registry[name] = getIds
def run(self):
for name in self._registry:
self._registry[name]()
getIds = IdModules()

View file

@ -7,8 +7,20 @@ from django.contrib.auth.models import User
from oxdjango.shortcuts import render_to_json_response from oxdjango.shortcuts import render_to_json_response
import models
def ids(request): def ids(request):
json = {} json = {}
movie = None
if 'imdb' in request.GET:
imdb_id = request.GET['imdb']
movie = models.getMovieIdByImdbId(imdb_id)
if 'criterion' in request.GET:
criterion_id = request.GET['criterion']
movie = models.MovieId.objects.get(criterion_id=criterion_id)
if movie:
movie.updateFromImdb()
json = movie.json()
return render_to_json_response(json) return render_to_json_response(json)
def urls(request): def urls(request):

View file

@ -1,4 +1,13 @@
#!/usr/bin/env python #!/usr/bin/env python
import os
root_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
os.chdir(root_dir)
#using virtualenv's activate_this.py to reorder sys.path
activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py')
execfile(activate_this, dict(__file__=activate_this))
from django.core.management import execute_manager from django.core.management import execute_manager
try: try:
import settings # Assumed to be in the same directory. import settings # Assumed to be in the same directory.

View file

@ -10,16 +10,15 @@ from django.core.files.base import ContentFile
import ox.web.criterion import ox.web.criterion
import ox.web.movieposterdb import ox.web.movieposterdb
import ox.web.karagarga
import ox.web.imdb import ox.web.imdb
import ox.web.impawards import ox.web.impawards
from oxdata.lookup.models import MovieId, Karagarga from oxdata.lookup.models import MovieId
def getPosters(movie_id): def getPosters(movie_id):
getPosterUrls(movie_id) getPosterUrls(movie_id)
posters = {} posters = {}
for p in PosterCache.objects.all().filter(movie_id=movie_id): for p in PosterCache.objects.all().filter(movie_id=movie_id).order_by('id'):
if p.site not in posters: if p.site not in posters:
posters[p.site] = [] posters[p.site] = []
poster = p.get() poster = p.get()
@ -28,8 +27,9 @@ def getPosters(movie_id):
pjson['url'] = poster._get_url() pjson['url'] = poster._get_url()
pjson['width'] = poster.width pjson['width'] = poster.width
pjson['height'] = poster.height pjson['height'] = poster.height
if poster.width < poster.height:
posters[p.site].append(pjson) posters[p.site].append(pjson)
for p in posters: for p in posters.keys():
if not posters[p]: if not posters[p]:
del posters[p] del posters[p]
return posters return posters
@ -50,17 +50,28 @@ class PosterCache(models.Model):
url = models.CharField(max_length=1024) url = models.CharField(max_length=1024)
site = models.CharField(max_length=255) site = models.CharField(max_length=255)
site_id = models.CharField(max_length=42) site_id = models.CharField(max_length=42)
image = models.ImageField(max_length=255, upload_to=lambda i, f: poster_path(i.url, f)) image = models.ImageField(max_length=255, upload_to=lambda i, f: poster_path(i.url.encode('utf-8'), f))
status = models.CharField(max_length=1024, default='200')
failed = models.BooleanField(default=False) failed = models.BooleanField(default=False)
def get(self): def get(self):
if not self.image and not self.failed: if not self.image and not self.failed:
try:
import ox.net import ox.net
name = hashlib.sha1(self.url).hexdigest() url = self.url.encode('utf-8')
data = ox.net.readUrl(self.url) name = hashlib.sha1(url).hexdigest()
try:
data = ox.net.readUrl(url)
self.image.save(name, ContentFile(data)) self.image.save(name, ContentFile(data))
except: except ox.net.urllib2.HTTPError, e:
import traceback
print traceback.print_exc()
self.status = e.code
self.failed = True
self.save()
except ox.net.urllib2.URLError, e:
import traceback
print traceback.print_exc()
self.status = e.reason
self.failed = True self.failed = True
self.save() self.save()
return self.image return self.image
@ -110,13 +121,6 @@ def getPosterUrls(m):
for poster in data['posters']: for poster in data['posters']:
addPoster(poster, 'impawards.com', m.imdb_id) addPoster(poster, 'impawards.com', m.imdb_id)
for kg in Karagarga.objects.all().filter(movie_id=m):
data = ox.web.karagarga.getData(kg.karagarga_id)
if data:
for poster in data['posters']:
addPoster(poster, 'karagarga.net', kg.karagarga_id)
else:
kg.delete()
#fixme: get 0xdb still, possibly use kg or imdb still as fallback? #fixme: get 0xdb still, possibly imdb still as fallback?

View file

@ -93,6 +93,7 @@ INSTALLED_APPS = (
LOGIN_REDIRECT_URL='/' LOGIN_REDIRECT_URL='/'
#overwrite default settings with local settings #overwrite default settings with local settings
try: try:
from local_settings import * from local_settings import *