use modules for additional ids, load virtualven in manage.py

This commit is contained in:
j 2010-08-05 13:47:19 +02:00
parent b817a10301
commit 03b484c261
7 changed files with 88 additions and 54 deletions

View file

@ -6,12 +6,12 @@ from django.conf import settings
from ox.cache import readUrlUnicode
from ox import findRe
import ox.web.criterion
import ox.web.karagarga
import ox.web.imdb
import ox.web.impawards
import models
from oxdata.poster.models import PosterCache
import modules
def addPoster(m, url, site, site_id):
if PosterCache.objects.all().filter(url=url).count() == 0:
@ -21,10 +21,11 @@ def addPoster(m, url, site, site_id):
def getIds():
for id in ox.web.impawards.getIds():
if models.MovieId.objects.all().filter(impawards_id=id).count() == 0:
print 'impawards', id
print 'impawards', ox.web.impawards.getUrl(id)
data = ox.web.impawards.getData(id)
if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId'])
if m:
if not m.impawards_id:
m.impawards_id = id
m.save()
@ -44,18 +45,5 @@ def getIds():
else:
print data['title'], "no imdbId"
#kg
lastId = models.Karagarga.maxId()
for id in ox.web.karagarga.getIds(lastId):
if models.Karagarga.objects.filter(karagarga_id=id).count() == 0:
print 'kg', id
data = ox.web.karagarga.getData(id)
if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId'])
kg = models.Karagarga()
kg.movie_id = m
kg.karagarga_id = id
kg.save()
for poster in data['posters']:
addPoster(m, poster, 'karagarga.net', kg.karagarga_id)
modules.getIds.run()

View file

@ -6,22 +6,31 @@ import hashlib
from django.db import models
from django.db.models import Q, Max
from django.contrib.auth.models import User
from django.utils import simplejson
import ox
from ox import stripTags
import ox.web.imdb
import ox.web.wikipedia
from ox import stripTags
def getMovieIdByImdbId(imdb_id):
#movies moved in imdb
imdb_id = {
'0377059': '0343663',
'0426560': '0088000',
}.get(imdb_id, imdb_id)
try:
m = MovieId.objects.get(imdb_id=imdb_id)
except MovieId.DoesNotExist:
#check if imdb_id actually exists on imdb
if ox.cache.exists('http://www.imdb.com/title/tt%s/combined'%imdb_id):
m = MovieId()
m.imdb_id = imdb_id
m.save()
#m.updateFromImdb()
m.updateFromImdb()
else:
print imdb_id, "is not a valid id, failed loading movie"
return None
return m
class MovieId(models.Model):
@ -117,13 +126,9 @@ class MovieId(models.Model):
value = getattr(self, key)
if value:
json[key] = value
return simplejson.dumps(json, indent=4)
if 'director' in json:
json['directors'] = json.pop('director').split(', ')
if 'year' in json and json['year']: json['year'] = int(json['year'])
return json
class Karagarga(models.Model):
movie_id = models.ForeignKey(MovieId, related_name='karagarga_ids', default=None)
karagarga_id = models.IntegerField(unique=True)
@classmethod
def maxId(cls):
return cls.objects.aggregate(Max('karagarga_id'))['karagarga_id__max']

15
oxdata/lookup/modules.py Normal file
View file

@ -0,0 +1,15 @@
class IdModules(object):
def __init__(self):
self._registry = {}
def register(self, name, getIds):
if name not in self._registry:
self._registry[name] = getIds
def run(self):
for name in self._registry:
self._registry[name]()
getIds = IdModules()

View file

@ -7,8 +7,20 @@ from django.contrib.auth.models import User
from oxdjango.shortcuts import render_to_json_response
import models
def ids(request):
json = {}
movie = None
if 'imdb' in request.GET:
imdb_id = request.GET['imdb']
movie = models.getMovieIdByImdbId(imdb_id)
if 'criterion' in request.GET:
criterion_id = request.GET['criterion']
movie = models.MovieId.objects.get(criterion_id=criterion_id)
if movie:
movie.updateFromImdb()
json = movie.json()
return render_to_json_response(json)
def urls(request):

View file

@ -1,4 +1,13 @@
#!/usr/bin/env python
import os
root_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__)))
os.chdir(root_dir)
#using virtualenv's activate_this.py to reorder sys.path
activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py')
execfile(activate_this, dict(__file__=activate_this))
from django.core.management import execute_manager
try:
import settings # Assumed to be in the same directory.

View file

@ -10,16 +10,15 @@ from django.core.files.base import ContentFile
import ox.web.criterion
import ox.web.movieposterdb
import ox.web.karagarga
import ox.web.imdb
import ox.web.impawards
from oxdata.lookup.models import MovieId, Karagarga
from oxdata.lookup.models import MovieId
def getPosters(movie_id):
getPosterUrls(movie_id)
posters = {}
for p in PosterCache.objects.all().filter(movie_id=movie_id):
for p in PosterCache.objects.all().filter(movie_id=movie_id).order_by('id'):
if p.site not in posters:
posters[p.site] = []
poster = p.get()
@ -28,8 +27,9 @@ def getPosters(movie_id):
pjson['url'] = poster._get_url()
pjson['width'] = poster.width
pjson['height'] = poster.height
if poster.width < poster.height:
posters[p.site].append(pjson)
for p in posters:
for p in posters.keys():
if not posters[p]:
del posters[p]
return posters
@ -50,17 +50,28 @@ class PosterCache(models.Model):
url = models.CharField(max_length=1024)
site = models.CharField(max_length=255)
site_id = models.CharField(max_length=42)
image = models.ImageField(max_length=255, upload_to=lambda i, f: poster_path(i.url, f))
image = models.ImageField(max_length=255, upload_to=lambda i, f: poster_path(i.url.encode('utf-8'), f))
status = models.CharField(max_length=1024, default='200')
failed = models.BooleanField(default=False)
def get(self):
if not self.image and not self.failed:
try:
import ox.net
name = hashlib.sha1(self.url).hexdigest()
data = ox.net.readUrl(self.url)
url = self.url.encode('utf-8')
name = hashlib.sha1(url).hexdigest()
try:
data = ox.net.readUrl(url)
self.image.save(name, ContentFile(data))
except:
except ox.net.urllib2.HTTPError, e:
import traceback
print traceback.print_exc()
self.status = e.code
self.failed = True
self.save()
except ox.net.urllib2.URLError, e:
import traceback
print traceback.print_exc()
self.status = e.reason
self.failed = True
self.save()
return self.image
@ -110,13 +121,6 @@ def getPosterUrls(m):
for poster in data['posters']:
addPoster(poster, 'impawards.com', m.imdb_id)
for kg in Karagarga.objects.all().filter(movie_id=m):
data = ox.web.karagarga.getData(kg.karagarga_id)
if data:
for poster in data['posters']:
addPoster(poster, 'karagarga.net', kg.karagarga_id)
else:
kg.delete()
#fixme: get 0xdb still, possibly use kg or imdb still as fallback?
#fixme: get 0xdb still, possibly imdb still as fallback?

View file

@ -93,6 +93,7 @@ INSTALLED_APPS = (
LOGIN_REDIRECT_URL='/'
#overwrite default settings with local settings
try:
from local_settings import *