add some cache data

2009-07-13 10:09:58 +02:00 · 2009-07-13 10:09:58 +02:00 · 294dbc8bac
commit 294dbc8bac
parent 86ab85024d
11 changed files with 279 additions and 9 deletions
--- a/criterion/cache.py
+++ b/criterion/cache.py
@ -0,0 +1,55 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import os
+
+from django.conf import settings
+from oxlib.cache import getUrlUnicode
+from oxlib import findRe
+import oxlib.net
+import oxweb.criterion
+
+from oxdata.lookup.models import IdMapping
+
+
+def getPoster(id, url=None):
+    dirname = os.path.join(settings.DATA_ROOT, 'criterion.com', id)
+    filename = os.path.join(dirname, 'poster.jpg')
+    filename = os.path.normpath(filename)
+    if not os.path.exists(filename):
+        if not url:
+            data = oxweb.criterion.getData(id)
+            url = data['posterUrl']
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        data = oxlib.net.getUrl(url)
+        f = open(filename, 'w')
+        f.write(data)
+        f.close()
+    return filename
+
+def archivePosters(init=False):
+    for criterionId in oxweb.criterion.getIds():
+        try:
+            m = IdMapping.objects.get(criterion_id=criterionId)
+        except IdMapping.DoesNotExist:
+            data = oxweb.criterion.getData(criterionId)
+            imdbId = data['imdbId']
+            if imdbId:
+                try:
+                    m = IdMapping.objects.get(imdb_id=imdbId)
+                except IdMapping.DoesNotExist:
+                    m = IdMapping()
+                    m.imdb_id = imdbId
+            else:
+                m = IdMapping()
+            m.criterion_id = criterionId
+            m.save()
+            url = data['posterUrl']
+            getPoster(criterionId, url)
+
+def cron():
+    archivePosters()
+
+def init():
+    archivePosters(True)
+
--- a/impawards/cache.py
+++ b/impawards/cache.py
@ -0,0 +1,53 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import os
+
+from django.conf import settings
+from oxlib.cache import getUrlUnicode
+from oxlib import findRe
+import oxlib.net
+import oxweb.impawards
+
+
+def getPosterFilename(id, url):
+    dirname = os.path.join(settings.DATA_ROOT, 'impawards.com', id[:1], id[:4], id)
+    filename = os.path.join(dirname, os.path.split(url)[1])
+    filename = os.path.normpath(filename)
+    return filename
+
+def getPoster(id, url):
+    filename = getPosterFilename(id, url)
+    if not os.path.exists(filename):
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        data = oxlib.net.getUrl(url)
+        f = open(filename, 'w')
+        f.write(data)
+        f.close()
+    return filename
+
+def archivePosters(init=False):
+    html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
+    pages = int(findRe(html, '<a href = page(.*?).html>'))
+    for page in range(pages + 1, 0, -1):
+        if page <= pages:
+            html = getUrlUnicode('http://impawards.com/archives/page%s.html' % page, timeout = -1)
+        urls = oxweb.impawards.parseArchivePage(html)
+        for url in urls:
+            html = getUrlUnicode(url, timeout = -1)
+            data = oxweb.impawards.parseMoviePage(html)
+            service = 'impawards'
+            url = data['posterUrl']
+            imdbId = data['imdbId']
+            filename = getPosterFilename(imdbId, url)
+            if not os.path.exists(filename):
+                getPoster(imdbId, url)
+            elif not init:
+                return
+
+def cron():
+    archivePosters()
+
+def init():
+    archivePosters(True)
+
--- a/karagarga/cache.py
+++ b/karagarga/cache.py
@ -0,0 +1,39 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import os
+import hashlib
+from django.conf import settings
+from oxlib.cache import getUrlUnicode
+from oxlib import findRe
+import oxlib.net
+import oxweb.movieposterdb
+
+
+def getPosterFilename(id, url):
+    id = str(id)
+    dirname = os.path.join(settings.DATA_ROOT, 'karagarga.net', id[:1], id)
+    url_hash = hashlib.sha1(url).hexdigest()
+    filename = os.path.join(dirname, '%s.jpg' % url_hash)
+    filename = os.path.normpath(filename)
+    return filename
+
+def getPoster(id, url):
+    filename = getPosterFilename(id, url)
+    if not os.path.exists(filename):
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        data = oxlib.net.getUrl(url)
+        f = open(filename, 'w')
+        f.write(data)
+        f.close()
+    return filename
+
+def archivePosters(init=False):
+    return
+
+def cron():
+    archivePosters()
+
+def init():
+    archivePosters(True)
+
--- a/lookup/init.py
+++ b/lookup/init.py
--- a/lookup/models.py
+++ b/lookup/models.py
@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import os.path
+from django.db import models
+from django.db.models import Q
+from django.contrib.auth.models import User
+
+import simplejson
+import oxweb.wikipedia
+
+
+class IdMapping(models.Model):
+    created = models.DateTimeField(auto_now_add=True)
+    modified = models.DateTimeField(auto_now=True)
+
+    oxdb_id = models.CharField(max_length=42, unique=True, blank=True, null=True,  default=None)
+    imdb_id = models.CharField(max_length=7, unique=True, blank=True, null=True,  default=None)
+
+    amg_id = models.IntegerField(unique=True, blank=True, null=True,  default=None)
+    wikipedia_url = models.CharField(unique=True, max_length=255, blank=True, null=True,  default=None)
+    criterion_id = models.IntegerField(unique=True, blank=True, null=True,  default=None)
+    movieposterdb_url = models.CharField(max_length=255, unique=True, blank=True, null=True,  default=None)
+    impawards_url = models.CharField(max_length=255, unique=True, blank=True, null=True,  default=None)
+    rottentomatoes_id = models.CharField(max_length=255, unique=True, blank=True, null=True,  default=None)
+
+    #FIXME: look into other ids
+    #what about tv.com ids/urls for tv episodes
+    kg_id = models.IntegerField(unique=True, blank=True, null=True,  default=None)
+
+
+    def __unicode__(self):
+        return self.imdb_id
+
+    def updateFromWikipedia(self):
+        if self.wikipedia_url:
+            data = oxweb.wikipedia.getMovieData(self.wikipedia_url)
+            _key = {}
+            for key in ('imdb_id',
+                        'amg_id',
+                        'rottentomatoes_id'):
+                if key in data:
+                    if data[key]:
+                        setattr(self, _key.get(key, key), data[key])
+            self.save()
+
+    def json(self):
+        json = {} 
+        for key in ('imdb_id',
+                    'amg_id',
+                    'oxdb_id',
+                    'wikipedia_url',
+                    'movieposterdb_id',
+                    'impawards_url',
+                    'rottentomatoes_id'):
+            value = getattr(self, key)
+            if value:
+                json[key] = value
+        return simplejson.dumps(json, indent=4)
+
--- a/lookup/views.py
+++ b/lookup/views.py
@ -0,0 +1 @@
+# Create your views here.
--- a/movieposterdb/cache.py
+++ b/movieposterdb/cache.py
@ -0,0 +1,40 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import os
+import hashlib
+from django.conf import settings
+from oxlib.cache import getUrlUnicode
+from oxlib import findRe
+import oxlib.net
+import oxweb.movieposterdb
+
+
+def getPoster(id, url=''):
+    if not url:
+        urls = oxweb.movieposterdb.getPosterUrls(id)
+        if urls:
+            url = urls[0]
+        else:
+            return 
+    dirname = os.path.join(settings.DATA_ROOT, 'movieposterdb.com', id[:1], id[:4], id)
+    url_hash = hashlib.sha1(url).hexdigest()
+    filename = os.path.join(dirname, '%s.jpg' % url_hash)
+    filename = os.path.normpath(filename)
+    if not os.path.exists(filename):
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        data = oxlib.net.getUrl(url)
+        f = open(filename, 'w')
+        f.write(data)
+        f.close()
+    return filename
+
+def archivePosters(init=False):
+    return
+
+def cron():
+    archivePosters()
+
+def init():
+    archivePosters(True)
+
--- a/poster/init.py
+++ b/poster/init.py
--- a/poster/models.py
+++ b/poster/models.py
@ -0,0 +1,3 @@
+from django.db import models
+
+# Create your models here.
--- a/poster/views.py
+++ b/poster/views.py
@ -0,0 +1 @@
+# Create your views here.
--- a/settings.py
+++ b/settings.py
@ -1,4 +1,10 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
 # Django settings for oxdata project.
+import os
+from os.path import join
+
+PROJECT_PATH = os.path.dirname(__file__)

 DEBUG = True
 TEMPLATE_DEBUG = DEBUG
@ -9,8 +15,8 @@ ADMINS = (

 MANAGERS = ADMINS

-DATABASE_ENGINE = ''           # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
-DATABASE_NAME = ''             # Or path to database file if using sqlite3.
+DATABASE_ENGINE = 'sqlite3'           # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
+DATABASE_NAME = 'dev.sqlite'             # Or path to database file if using sqlite3.
 DATABASE_USER = ''             # Not used with sqlite3.
 DATABASE_PASSWORD = ''         # Not used with sqlite3.
 DATABASE_HOST = ''             # Set to empty string for localhost. Not used with sqlite3.
@ -21,7 +27,7 @@ DATABASE_PORT = ''             # Set to empty string for default. Not used with
 # although not all choices may be available on all operating systems.
 # If running in a Windows environment this must be set to the same as your
 # system time zone.
-TIME_ZONE = 'America/Chicago'
+TIME_ZONE = 'Europe/Berlin'

 # Language code for this installation. All choices can be found here:
 # http://www.i18nguy.com/unicode/language-identifiers.html
@ -35,17 +41,20 @@ USE_I18N = True

 # Absolute path to the directory that holds media.
 # Example: "/home/media/media.lawrence.com/"
-MEDIA_ROOT = ''
+MEDIA_ROOT = join(PROJECT_PATH, 'media')
+STATIC_ROOT = join(PROJECT_PATH, 'static')
+DATA_ROOT = join(PROJECT_PATH, 'data')
+

 # URL that handles the media served from MEDIA_ROOT. Make sure to use a
 # trailing slash if there is a path component (optional in other cases).
 # Examples: "http://media.lawrence.com", "http://example.com/media/"
-MEDIA_URL = ''
+MEDIA_URL = '/media/'

 # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
 # trailing slash.
 # Examples: "http://foo.com/media/", "/media/".
-ADMIN_MEDIA_PREFIX = '/media/'
+ADMIN_MEDIA_PREFIX = '/admin/media/'

 # Make this unique, and don't share it with anybody.
 SECRET_KEY = '8n+5je$*h3d++v)o65oji)eq6ufm*z6(_i(z7gsu+eyp47d+24'
@ -66,9 +75,7 @@ MIDDLEWARE_CLASSES = (
 ROOT_URLCONF = 'oxdata.urls'

 TEMPLATE_DIRS = (
-    # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
-    # Always use forward slashes, even on Windows.
-    # Don't forget to use absolute paths, not relative paths.
+    join(PROJECT_PATH, 'templates'),
 )

 INSTALLED_APPS = (
@ -76,4 +83,16 @@ INSTALLED_APPS = (
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.sites',
+    'django.contrib.humanize',
+    'oxdata.criterion',
+    'oxdata.impawards',
+    'oxdata.karagarga',
+    'oxdata.impawards',
+    'oxdata.movieposterdb',
+
+    'oxdata.lookup',
+    'oxdata.poster',
 )
+
+LOGIN_REDIRECT_URL='/'
+