add some cache data

This commit is contained in:
j 2009-07-13 10:09:58 +02:00
parent 86ab85024d
commit 294dbc8bac
11 changed files with 279 additions and 9 deletions

55
criterion/cache.py Normal file
View file

@ -0,0 +1,55 @@
# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import os
from django.conf import settings
from oxlib.cache import getUrlUnicode
from oxlib import findRe
import oxlib.net
import oxweb.criterion
from oxdata.lookup.models import IdMapping
def getPoster(id, url=None):
dirname = os.path.join(settings.DATA_ROOT, 'criterion.com', id)
filename = os.path.join(dirname, 'poster.jpg')
filename = os.path.normpath(filename)
if not os.path.exists(filename):
if not url:
data = oxweb.criterion.getData(id)
url = data['posterUrl']
if not os.path.exists(dirname):
os.makedirs(dirname)
data = oxlib.net.getUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
return filename
def archivePosters(init=False):
for criterionId in oxweb.criterion.getIds():
try:
m = IdMapping.objects.get(criterion_id=criterionId)
except IdMapping.DoesNotExist:
data = oxweb.criterion.getData(criterionId)
imdbId = data['imdbId']
if imdbId:
try:
m = IdMapping.objects.get(imdb_id=imdbId)
except IdMapping.DoesNotExist:
m = IdMapping()
m.imdb_id = imdbId
else:
m = IdMapping()
m.criterion_id = criterionId
m.save()
url = data['posterUrl']
getPoster(criterionId, url)
def cron():
archivePosters()
def init():
archivePosters(True)

53
impawards/cache.py Normal file
View file

@ -0,0 +1,53 @@
# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import os
from django.conf import settings
from oxlib.cache import getUrlUnicode
from oxlib import findRe
import oxlib.net
import oxweb.impawards
def getPosterFilename(id, url):
dirname = os.path.join(settings.DATA_ROOT, 'impawards.com', id[:1], id[:4], id)
filename = os.path.join(dirname, os.path.split(url)[1])
filename = os.path.normpath(filename)
return filename
def getPoster(id, url):
filename = getPosterFilename(id, url)
if not os.path.exists(filename):
if not os.path.exists(dirname):
os.makedirs(dirname)
data = oxlib.net.getUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
return filename
def archivePosters(init=False):
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
pages = int(findRe(html, '<a href = page(.*?).html>'))
for page in range(pages + 1, 0, -1):
if page <= pages:
html = getUrlUnicode('http://impawards.com/archives/page%s.html' % page, timeout = -1)
urls = oxweb.impawards.parseArchivePage(html)
for url in urls:
html = getUrlUnicode(url, timeout = -1)
data = oxweb.impawards.parseMoviePage(html)
service = 'impawards'
url = data['posterUrl']
imdbId = data['imdbId']
filename = getPosterFilename(imdbId, url)
if not os.path.exists(filename):
getPoster(imdbId, url)
elif not init:
return
def cron():
archivePosters()
def init():
archivePosters(True)

39
karagarga/cache.py Normal file
View file

@ -0,0 +1,39 @@
# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import os
import hashlib
from django.conf import settings
from oxlib.cache import getUrlUnicode
from oxlib import findRe
import oxlib.net
import oxweb.movieposterdb
def getPosterFilename(id, url):
id = str(id)
dirname = os.path.join(settings.DATA_ROOT, 'karagarga.net', id[:1], id)
url_hash = hashlib.sha1(url).hexdigest()
filename = os.path.join(dirname, '%s.jpg' % url_hash)
filename = os.path.normpath(filename)
return filename
def getPoster(id, url):
filename = getPosterFilename(id, url)
if not os.path.exists(filename):
if not os.path.exists(dirname):
os.makedirs(dirname)
data = oxlib.net.getUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
return filename
def archivePosters(init=False):
return
def cron():
archivePosters()
def init():
archivePosters(True)

0
lookup/__init__.py Normal file
View file

59
lookup/models.py Normal file
View file

@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import os.path
from django.db import models
from django.db.models import Q
from django.contrib.auth.models import User
import simplejson
import oxweb.wikipedia
class IdMapping(models.Model):
created = models.DateTimeField(auto_now_add=True)
modified = models.DateTimeField(auto_now=True)
oxdb_id = models.CharField(max_length=42, unique=True, blank=True, null=True, default=None)
imdb_id = models.CharField(max_length=7, unique=True, blank=True, null=True, default=None)
amg_id = models.IntegerField(unique=True, blank=True, null=True, default=None)
wikipedia_url = models.CharField(unique=True, max_length=255, blank=True, null=True, default=None)
criterion_id = models.IntegerField(unique=True, blank=True, null=True, default=None)
movieposterdb_url = models.CharField(max_length=255, unique=True, blank=True, null=True, default=None)
impawards_url = models.CharField(max_length=255, unique=True, blank=True, null=True, default=None)
rottentomatoes_id = models.CharField(max_length=255, unique=True, blank=True, null=True, default=None)
#FIXME: look into other ids
#what about tv.com ids/urls for tv episodes
kg_id = models.IntegerField(unique=True, blank=True, null=True, default=None)
def __unicode__(self):
return self.imdb_id
def updateFromWikipedia(self):
if self.wikipedia_url:
data = oxweb.wikipedia.getMovieData(self.wikipedia_url)
_key = {}
for key in ('imdb_id',
'amg_id',
'rottentomatoes_id'):
if key in data:
if data[key]:
setattr(self, _key.get(key, key), data[key])
self.save()
def json(self):
json = {}
for key in ('imdb_id',
'amg_id',
'oxdb_id',
'wikipedia_url',
'movieposterdb_id',
'impawards_url',
'rottentomatoes_id'):
value = getattr(self, key)
if value:
json[key] = value
return simplejson.dumps(json, indent=4)

1
lookup/views.py Normal file
View file

@ -0,0 +1 @@
# Create your views here.

40
movieposterdb/cache.py Normal file
View file

@ -0,0 +1,40 @@
# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import os
import hashlib
from django.conf import settings
from oxlib.cache import getUrlUnicode
from oxlib import findRe
import oxlib.net
import oxweb.movieposterdb
def getPoster(id, url=''):
if not url:
urls = oxweb.movieposterdb.getPosterUrls(id)
if urls:
url = urls[0]
else:
return
dirname = os.path.join(settings.DATA_ROOT, 'movieposterdb.com', id[:1], id[:4], id)
url_hash = hashlib.sha1(url).hexdigest()
filename = os.path.join(dirname, '%s.jpg' % url_hash)
filename = os.path.normpath(filename)
if not os.path.exists(filename):
if not os.path.exists(dirname):
os.makedirs(dirname)
data = oxlib.net.getUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
return filename
def archivePosters(init=False):
return
def cron():
archivePosters()
def init():
archivePosters(True)

0
poster/__init__.py Normal file
View file

3
poster/models.py Normal file
View file

@ -0,0 +1,3 @@
from django.db import models
# Create your models here.

1
poster/views.py Normal file
View file

@ -0,0 +1 @@
# Create your views here.

View file

@ -1,4 +1,10 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
# Django settings for oxdata project. # Django settings for oxdata project.
import os
from os.path import join
PROJECT_PATH = os.path.dirname(__file__)
DEBUG = True DEBUG = True
TEMPLATE_DEBUG = DEBUG TEMPLATE_DEBUG = DEBUG
@ -9,8 +15,8 @@ ADMINS = (
MANAGERS = ADMINS MANAGERS = ADMINS
DATABASE_ENGINE = '' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'. DATABASE_ENGINE = 'sqlite3' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'oracle'.
DATABASE_NAME = '' # Or path to database file if using sqlite3. DATABASE_NAME = 'dev.sqlite' # Or path to database file if using sqlite3.
DATABASE_USER = '' # Not used with sqlite3. DATABASE_USER = '' # Not used with sqlite3.
DATABASE_PASSWORD = '' # Not used with sqlite3. DATABASE_PASSWORD = '' # Not used with sqlite3.
DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3. DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3.
@ -21,7 +27,7 @@ DATABASE_PORT = '' # Set to empty string for default. Not used with
# although not all choices may be available on all operating systems. # although not all choices may be available on all operating systems.
# If running in a Windows environment this must be set to the same as your # If running in a Windows environment this must be set to the same as your
# system time zone. # system time zone.
TIME_ZONE = 'America/Chicago' TIME_ZONE = 'Europe/Berlin'
# Language code for this installation. All choices can be found here: # Language code for this installation. All choices can be found here:
# http://www.i18nguy.com/unicode/language-identifiers.html # http://www.i18nguy.com/unicode/language-identifiers.html
@ -35,17 +41,20 @@ USE_I18N = True
# Absolute path to the directory that holds media. # Absolute path to the directory that holds media.
# Example: "/home/media/media.lawrence.com/" # Example: "/home/media/media.lawrence.com/"
MEDIA_ROOT = '' MEDIA_ROOT = join(PROJECT_PATH, 'media')
STATIC_ROOT = join(PROJECT_PATH, 'static')
DATA_ROOT = join(PROJECT_PATH, 'data')
# URL that handles the media served from MEDIA_ROOT. Make sure to use a # URL that handles the media served from MEDIA_ROOT. Make sure to use a
# trailing slash if there is a path component (optional in other cases). # trailing slash if there is a path component (optional in other cases).
# Examples: "http://media.lawrence.com", "http://example.com/media/" # Examples: "http://media.lawrence.com", "http://example.com/media/"
MEDIA_URL = '' MEDIA_URL = '/media/'
# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a # URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a
# trailing slash. # trailing slash.
# Examples: "http://foo.com/media/", "/media/". # Examples: "http://foo.com/media/", "/media/".
ADMIN_MEDIA_PREFIX = '/media/' ADMIN_MEDIA_PREFIX = '/admin/media/'
# Make this unique, and don't share it with anybody. # Make this unique, and don't share it with anybody.
SECRET_KEY = '8n+5je$*h3d++v)o65oji)eq6ufm*z6(_i(z7gsu+eyp47d+24' SECRET_KEY = '8n+5je$*h3d++v)o65oji)eq6ufm*z6(_i(z7gsu+eyp47d+24'
@ -66,9 +75,7 @@ MIDDLEWARE_CLASSES = (
ROOT_URLCONF = 'oxdata.urls' ROOT_URLCONF = 'oxdata.urls'
TEMPLATE_DIRS = ( TEMPLATE_DIRS = (
# Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". join(PROJECT_PATH, 'templates'),
# Always use forward slashes, even on Windows.
# Don't forget to use absolute paths, not relative paths.
) )
INSTALLED_APPS = ( INSTALLED_APPS = (
@ -76,4 +83,16 @@ INSTALLED_APPS = (
'django.contrib.contenttypes', 'django.contrib.contenttypes',
'django.contrib.sessions', 'django.contrib.sessions',
'django.contrib.sites', 'django.contrib.sites',
'django.contrib.humanize',
'oxdata.criterion',
'oxdata.impawards',
'oxdata.karagarga',
'oxdata.impawards',
'oxdata.movieposterdb',
'oxdata.lookup',
'oxdata.poster',
) )
LOGIN_REDIRECT_URL='/'