diff --git a/oxdata/lookup/cache.py b/oxdata/lookup/cache.py index 1cc0423..c30550e 100644 --- a/oxdata/lookup/cache.py +++ b/oxdata/lookup/cache.py @@ -6,12 +6,12 @@ from django.conf import settings from ox.cache import readUrlUnicode from ox import findRe import ox.web.criterion -import ox.web.karagarga import ox.web.imdb import ox.web.impawards import models from oxdata.poster.models import PosterCache +import modules def addPoster(m, url, site, site_id): if PosterCache.objects.all().filter(url=url).count() == 0: @@ -21,15 +21,16 @@ def addPoster(m, url, site, site_id): def getIds(): for id in ox.web.impawards.getIds(): if models.MovieId.objects.all().filter(impawards_id=id).count() == 0: - print 'impawards', id + print 'impawards', ox.web.impawards.getUrl(id) data = ox.web.impawards.getData(id) if data and 'imdbId' in data: m = models.getMovieIdByImdbId(data['imdbId']) - if not m.impawards_id: - m.impawards_id = id - m.save() - for poster in data['posters']: - addPoster(m, poster, 'impawards.com', m.imdb_id) + if m: + if not m.impawards_id: + m.impawards_id = id + m.save() + for poster in data['posters']: + addPoster(m, poster, 'impawards.com', m.imdb_id) for id in ox.web.criterion.getIds(): if models.MovieId.objects.all().filter(criterion_id=id).count() == 0: @@ -44,18 +45,5 @@ def getIds(): else: print data['title'], "no imdbId" - #kg - lastId = models.Karagarga.maxId() - for id in ox.web.karagarga.getIds(lastId): - if models.Karagarga.objects.filter(karagarga_id=id).count() == 0: - print 'kg', id - data = ox.web.karagarga.getData(id) - if data and 'imdbId' in data: - m = models.getMovieIdByImdbId(data['imdbId']) - kg = models.Karagarga() - kg.movie_id = m - kg.karagarga_id = id - kg.save() - for poster in data['posters']: - addPoster(m, poster, 'karagarga.net', kg.karagarga_id) + modules.getIds.run() diff --git a/oxdata/lookup/models.py b/oxdata/lookup/models.py index b42505d..1f582ee 100644 --- a/oxdata/lookup/models.py +++ b/oxdata/lookup/models.py @@ -6,22 +6,31 @@ import hashlib from django.db import models from django.db.models import Q, Max from django.contrib.auth.models import User -from django.utils import simplejson +import ox +from ox import stripTags import ox.web.imdb import ox.web.wikipedia -from ox import stripTags - def getMovieIdByImdbId(imdb_id): + #movies moved in imdb + imdb_id = { + '0377059': '0343663', + '0426560': '0088000', + }.get(imdb_id, imdb_id) try: m = MovieId.objects.get(imdb_id=imdb_id) except MovieId.DoesNotExist: - m = MovieId() - m.imdb_id = imdb_id - m.save() - #m.updateFromImdb() + #check if imdb_id actually exists on imdb + if ox.cache.exists('http://www.imdb.com/title/tt%s/combined'%imdb_id): + m = MovieId() + m.imdb_id = imdb_id + m.save() + m.updateFromImdb() + else: + print imdb_id, "is not a valid id, failed loading movie" + return None return m class MovieId(models.Model): @@ -117,13 +126,9 @@ class MovieId(models.Model): value = getattr(self, key) if value: json[key] = value - return simplejson.dumps(json, indent=4) + if 'director' in json: + json['directors'] = json.pop('director').split(', ') + if 'year' in json and json['year']: json['year'] = int(json['year']) + return json -class Karagarga(models.Model): - movie_id = models.ForeignKey(MovieId, related_name='karagarga_ids', default=None) - karagarga_id = models.IntegerField(unique=True) - @classmethod - def maxId(cls): - return cls.objects.aggregate(Max('karagarga_id'))['karagarga_id__max'] - diff --git a/oxdata/lookup/modules.py b/oxdata/lookup/modules.py new file mode 100644 index 0000000..c63a920 --- /dev/null +++ b/oxdata/lookup/modules.py @@ -0,0 +1,15 @@ + +class IdModules(object): + def __init__(self): + self._registry = {} + + def register(self, name, getIds): + if name not in self._registry: + self._registry[name] = getIds + + def run(self): + for name in self._registry: + self._registry[name]() + +getIds = IdModules() + diff --git a/oxdata/lookup/views.py b/oxdata/lookup/views.py index 75f42ae..b76095b 100644 --- a/oxdata/lookup/views.py +++ b/oxdata/lookup/views.py @@ -7,8 +7,20 @@ from django.contrib.auth.models import User from oxdjango.shortcuts import render_to_json_response +import models + def ids(request): json = {} + movie = None + if 'imdb' in request.GET: + imdb_id = request.GET['imdb'] + movie = models.getMovieIdByImdbId(imdb_id) + if 'criterion' in request.GET: + criterion_id = request.GET['criterion'] + movie = models.MovieId.objects.get(criterion_id=criterion_id) + if movie: + movie.updateFromImdb() + json = movie.json() return render_to_json_response(json) def urls(request): diff --git a/oxdata/manage.py b/oxdata/manage.py index 5e78ea9..b9fbfbf 100644 --- a/oxdata/manage.py +++ b/oxdata/manage.py @@ -1,4 +1,13 @@ #!/usr/bin/env python +import os + +root_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) +os.chdir(root_dir) + +#using virtualenv's activate_this.py to reorder sys.path +activate_this = os.path.join(root_dir, '..', 'bin', 'activate_this.py') +execfile(activate_this, dict(__file__=activate_this)) + from django.core.management import execute_manager try: import settings # Assumed to be in the same directory. diff --git a/oxdata/poster/models.py b/oxdata/poster/models.py index cdc3528..4ea6b53 100644 --- a/oxdata/poster/models.py +++ b/oxdata/poster/models.py @@ -10,16 +10,15 @@ from django.core.files.base import ContentFile import ox.web.criterion import ox.web.movieposterdb -import ox.web.karagarga import ox.web.imdb import ox.web.impawards -from oxdata.lookup.models import MovieId, Karagarga +from oxdata.lookup.models import MovieId def getPosters(movie_id): getPosterUrls(movie_id) posters = {} - for p in PosterCache.objects.all().filter(movie_id=movie_id): + for p in PosterCache.objects.all().filter(movie_id=movie_id).order_by('id'): if p.site not in posters: posters[p.site] = [] poster = p.get() @@ -28,8 +27,9 @@ def getPosters(movie_id): pjson['url'] = poster._get_url() pjson['width'] = poster.width pjson['height'] = poster.height - posters[p.site].append(pjson) - for p in posters: + if poster.width < poster.height: + posters[p.site].append(pjson) + for p in posters.keys(): if not posters[p]: del posters[p] return posters @@ -50,17 +50,28 @@ class PosterCache(models.Model): url = models.CharField(max_length=1024) site = models.CharField(max_length=255) site_id = models.CharField(max_length=42) - image = models.ImageField(max_length=255, upload_to=lambda i, f: poster_path(i.url, f)) + image = models.ImageField(max_length=255, upload_to=lambda i, f: poster_path(i.url.encode('utf-8'), f)) + status = models.CharField(max_length=1024, default='200') failed = models.BooleanField(default=False) def get(self): if not self.image and not self.failed: + import ox.net + url = self.url.encode('utf-8') + name = hashlib.sha1(url).hexdigest() try: - import ox.net - name = hashlib.sha1(self.url).hexdigest() - data = ox.net.readUrl(self.url) + data = ox.net.readUrl(url) self.image.save(name, ContentFile(data)) - except: + except ox.net.urllib2.HTTPError, e: + import traceback + print traceback.print_exc() + self.status = e.code + self.failed = True + self.save() + except ox.net.urllib2.URLError, e: + import traceback + print traceback.print_exc() + self.status = e.reason self.failed = True self.save() return self.image @@ -110,13 +121,6 @@ def getPosterUrls(m): for poster in data['posters']: addPoster(poster, 'impawards.com', m.imdb_id) - for kg in Karagarga.objects.all().filter(movie_id=m): - data = ox.web.karagarga.getData(kg.karagarga_id) - if data: - for poster in data['posters']: - addPoster(poster, 'karagarga.net', kg.karagarga_id) - else: - kg.delete() - #fixme: get 0xdb still, possibly use kg or imdb still as fallback? + #fixme: get 0xdb still, possibly imdb still as fallback? diff --git a/oxdata/settings.py b/oxdata/settings.py index c520dcc..90307ff 100644 --- a/oxdata/settings.py +++ b/oxdata/settings.py @@ -93,6 +93,7 @@ INSTALLED_APPS = ( LOGIN_REDIRECT_URL='/' + #overwrite default settings with local settings try: from local_settings import *