diff --git a/criterion/cache.py b/criterion/cache.py new file mode 100644 index 0000000..91c670c --- /dev/null +++ b/criterion/cache.py @@ -0,0 +1,55 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import os + +from django.conf import settings +from oxlib.cache import getUrlUnicode +from oxlib import findRe +import oxlib.net +import oxweb.criterion + +from oxdata.lookup.models import IdMapping + + +def getPoster(id, url=None): + dirname = os.path.join(settings.DATA_ROOT, 'criterion.com', id) + filename = os.path.join(dirname, 'poster.jpg') + filename = os.path.normpath(filename) + if not os.path.exists(filename): + if not url: + data = oxweb.criterion.getData(id) + url = data['posterUrl'] + if not os.path.exists(dirname): + os.makedirs(dirname) + data = oxlib.net.getUrl(url) + f = open(filename, 'w') + f.write(data) + f.close() + return filename + +def archivePosters(init=False): + for criterionId in oxweb.criterion.getIds(): + try: + m = IdMapping.objects.get(criterion_id=criterionId) + except IdMapping.DoesNotExist: + data = oxweb.criterion.getData(criterionId) + imdbId = data['imdbId'] + if imdbId: + try: + m = IdMapping.objects.get(imdb_id=imdbId) + except IdMapping.DoesNotExist: + m = IdMapping() + m.imdb_id = imdbId + else: + m = IdMapping() + m.criterion_id = criterionId + m.save() + url = data['posterUrl'] + getPoster(criterionId, url) + +def cron(): + archivePosters() + +def init(): + archivePosters(True) + diff --git a/impawards/cache.py b/impawards/cache.py new file mode 100644 index 0000000..05348f6 --- /dev/null +++ b/impawards/cache.py @@ -0,0 +1,53 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import os + +from django.conf import settings +from oxlib.cache import getUrlUnicode +from oxlib import findRe +import oxlib.net +import oxweb.impawards + + +def getPosterFilename(id, url): + dirname = os.path.join(settings.DATA_ROOT, 'impawards.com', id[:1], id[:4], id) + filename = os.path.join(dirname, os.path.split(url)[1]) + filename = os.path.normpath(filename) + return filename + +def getPoster(id, url): + filename = getPosterFilename(id, url) + if not os.path.exists(filename): + if not os.path.exists(dirname): + os.makedirs(dirname) + data = oxlib.net.getUrl(url) + f = open(filename, 'w') + f.write(data) + f.close() + return filename + +def archivePosters(init=False): + html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0) + pages = int(findRe(html, '')) + for page in range(pages + 1, 0, -1): + if page <= pages: + html = getUrlUnicode('http://impawards.com/archives/page%s.html' % page, timeout = -1) + urls = oxweb.impawards.parseArchivePage(html) + for url in urls: + html = getUrlUnicode(url, timeout = -1) + data = oxweb.impawards.parseMoviePage(html) + service = 'impawards' + url = data['posterUrl'] + imdbId = data['imdbId'] + filename = getPosterFilename(imdbId, url) + if not os.path.exists(filename): + getPoster(imdbId, url) + elif not init: + return + +def cron(): + archivePosters() + +def init(): + archivePosters(True) + diff --git a/karagarga/cache.py b/karagarga/cache.py new file mode 100644 index 0000000..9608786 --- /dev/null +++ b/karagarga/cache.py @@ -0,0 +1,39 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import os +import hashlib +from django.conf import settings +from oxlib.cache import getUrlUnicode +from oxlib import findRe +import oxlib.net +import oxweb.movieposterdb + + +def getPosterFilename(id, url): + id = str(id) + dirname = os.path.join(settings.DATA_ROOT, 'karagarga.net', id[:1], id) + url_hash = hashlib.sha1(url).hexdigest() + filename = os.path.join(dirname, '%s.jpg' % url_hash) + filename = os.path.normpath(filename) + return filename + +def getPoster(id, url): + filename = getPosterFilename(id, url) + if not os.path.exists(filename): + if not os.path.exists(dirname): + os.makedirs(dirname) + data = oxlib.net.getUrl(url) + f = open(filename, 'w') + f.write(data) + f.close() + return filename + +def archivePosters(init=False): + return + +def cron(): + archivePosters() + +def init(): + archivePosters(True) + diff --git a/lookup/__init__.py b/lookup/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lookup/models.py b/lookup/models.py new file mode 100644 index 0000000..5aca961 --- /dev/null +++ b/lookup/models.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import os.path +from django.db import models +from django.db.models import Q +from django.contrib.auth.models import User + +import simplejson +import oxweb.wikipedia + + +class IdMapping(models.Model): + created = models.DateTimeField(auto_now_add=True) + modified = models.DateTimeField(auto_now=True) + + oxdb_id = models.CharField(max_length=42, unique=True, blank=True, null=True, default=None) + imdb_id = models.CharField(max_length=7, unique=True, blank=True, null=True, default=None) + + amg_id = models.IntegerField(unique=True, blank=True, null=True, default=None) + wikipedia_url = models.CharField(unique=True, max_length=255, blank=True, null=True, default=None) + criterion_id = models.IntegerField(unique=True, blank=True, null=True, default=None) + movieposterdb_url = models.CharField(max_length=255, unique=True, blank=True, null=True, default=None) + impawards_url = models.CharField(max_length=255, unique=True, blank=True, null=True, default=None) + rottentomatoes_id = models.CharField(max_length=255, unique=True, blank=True, null=True, default=None) + + #FIXME: look into other ids + #what about tv.com ids/urls for tv episodes + kg_id = models.IntegerField(unique=True, blank=True, null=True, default=None) + + + def __unicode__(self): + return self.imdb_id + + def updateFromWikipedia(self): + if self.wikipedia_url: + data = oxweb.wikipedia.getMovieData(self.wikipedia_url) + _key = {} + for key in ('imdb_id', + 'amg_id', + 'rottentomatoes_id'): + if key in data: + if data[key]: + setattr(self, _key.get(key, key), data[key]) + self.save() + + def json(self): + json = {} + for key in ('imdb_id', + 'amg_id', + 'oxdb_id', + 'wikipedia_url', + 'movieposterdb_id', + 'impawards_url', + 'rottentomatoes_id'): + value = getattr(self, key) + if value: + json[key] = value + return simplejson.dumps(json, indent=4) + diff --git a/lookup/views.py b/lookup/views.py new file mode 100644 index 0000000..60f00ef --- /dev/null +++ b/lookup/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/movieposterdb/cache.py b/movieposterdb/cache.py new file mode 100644 index 0000000..631e70d --- /dev/null +++ b/movieposterdb/cache.py @@ -0,0 +1,40 @@ +# -*- coding: UTF-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 +import os +import hashlib +from django.conf import settings +from oxlib.cache import getUrlUnicode +from oxlib import findRe +import oxlib.net +import oxweb.movieposterdb + + +def getPoster(id, url=''): + if not url: + urls = oxweb.movieposterdb.getPosterUrls(id) + if urls: + url = urls[0] + else: + return + dirname = os.path.join(settings.DATA_ROOT, 'movieposterdb.com', id[:1], id[:4], id) + url_hash = hashlib.sha1(url).hexdigest() + filename = os.path.join(dirname, '%s.jpg' % url_hash) + filename = os.path.normpath(filename) + if not os.path.exists(filename): + if not os.path.exists(dirname): + os.makedirs(dirname) + data = oxlib.net.getUrl(url) + f = open(filename, 'w') + f.write(data) + f.close() + return filename + +def archivePosters(init=False): + return + +def cron(): + archivePosters() + +def init(): + archivePosters(True) + diff --git a/poster/__init__.py b/poster/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/poster/models.py b/poster/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/poster/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/poster/views.py b/poster/views.py new file mode 100644 index 0000000..60f00ef --- /dev/null +++ b/poster/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/settings.py b/settings.py index 329e167..4f3d1b7 100644 --- a/settings.py +++ b/settings.py @@ -1,4 +1,10 @@ +# -*- coding: utf-8 -*- +# vi:si:et:sw=4:sts=4:ts=4 # Django settings for oxdata project. +import os +from os.path import join + +PROJECT_PATH = os.path.dirname(__file__) DEBUG = True TEMPLATE_DEBUG = DEBUG @@ -9,8 +15,8 @@ ADMINS = ( MANAGERS = ADMINS -DATABASE_ENGINE = '' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. -DATABASE_NAME = '' # Or path to database file if using sqlite3. +DATABASE_ENGINE = 'sqlite3' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. +DATABASE_NAME = 'dev.sqlite' # Or path to database file if using sqlite3. DATABASE_USER = '' # Not used with sqlite3. DATABASE_PASSWORD = '' # Not used with sqlite3. DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3. @@ -21,7 +27,7 @@ DATABASE_PORT = '' # Set to empty string for default. Not used with # although not all choices may be available on all operating systems. # If running in a Windows environment this must be set to the same as your # system time zone. -TIME_ZONE = 'America/Chicago' +TIME_ZONE = 'Europe/Berlin' # Language code for this installation. All choices can be found here: # http://www.i18nguy.com/unicode/language-identifiers.html @@ -35,17 +41,20 @@ USE_I18N = True # Absolute path to the directory that holds media. # Example: "/home/media/media.lawrence.com/" -MEDIA_ROOT = '' +MEDIA_ROOT = join(PROJECT_PATH, 'media') +STATIC_ROOT = join(PROJECT_PATH, 'static') +DATA_ROOT = join(PROJECT_PATH, 'data') + # URL that handles the media served from MEDIA_ROOT. Make sure to use a # trailing slash if there is a path component (optional in other cases). # Examples: "http://media.lawrence.com", "http://example.com/media/" -MEDIA_URL = '' +MEDIA_URL = '/media/' # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a # trailing slash. # Examples: "http://foo.com/media/", "/media/". -ADMIN_MEDIA_PREFIX = '/media/' +ADMIN_MEDIA_PREFIX = '/admin/media/' # Make this unique, and don't share it with anybody. SECRET_KEY = '8n+5je$*h3d++v)o65oji)eq6ufm*z6(_i(z7gsu+eyp47d+24' @@ -66,9 +75,7 @@ MIDDLEWARE_CLASSES = ( ROOT_URLCONF = 'oxdata.urls' TEMPLATE_DIRS = ( - # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". - # Always use forward slashes, even on Windows. - # Don't forget to use absolute paths, not relative paths. + join(PROJECT_PATH, 'templates'), ) INSTALLED_APPS = ( @@ -76,4 +83,16 @@ INSTALLED_APPS = ( 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.sites', + 'django.contrib.humanize', + 'oxdata.criterion', + 'oxdata.impawards', + 'oxdata.karagarga', + 'oxdata.impawards', + 'oxdata.movieposterdb', + + 'oxdata.lookup', + 'oxdata.poster', ) + +LOGIN_REDIRECT_URL='/' +