use new python-ox api

This commit is contained in:
j 2012-08-15 17:15:59 +02:00
parent f6cb05b5fb
commit e267ba48df
11 changed files with 61 additions and 62 deletions

View file

@ -64,7 +64,7 @@ class CoverCache(models.Model):
url = self.url.encode('utf-8') url = self.url.encode('utf-8')
name = hashlib.sha1(url).hexdigest() name = hashlib.sha1(url).hexdigest()
try: try:
data = ox.net.readUrl(url) data = ox.net.read_url(url)
self.image.save(name, ContentFile(data)) self.image.save(name, ContentFile(data))
except ox.net.urllib2.HTTPError, e: except ox.net.urllib2.HTTPError, e:
#import traceback #import traceback

View file

@ -3,8 +3,7 @@
import os import os
from django.conf import settings from django.conf import settings
from ox.cache import readUrlUnicode from ox import find_re
from ox import findRe
import ox.web.criterion import ox.web.criterion
import ox.web.imdb import ox.web.imdb
import ox.web.impawards import ox.web.impawards
@ -18,8 +17,8 @@ def addPoster(m, url, site, site_id):
p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m) p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m)
p.save() p.save()
def getIds(): def get_ids():
for id in ox.web.impawards.getIds(): for id in ox.web.impawards.get_ids():
if id in ( if id in (
'2005/night', '2005/night',
'2007/hands_of_the_dragon', '2007/hands_of_the_dragon',
@ -31,27 +30,27 @@ def getIds():
): ):
continue continue
if models.MovieId.objects.all().filter(impawards_id=id).count() == 0: if models.MovieId.objects.all().filter(impawards_id=id).count() == 0:
data = ox.web.impawards.getData(id) data = ox.web.impawards.get_data(id)
if data and 'imdbId' in data: if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId']) m = models.get_movie_id(data['imdbId'])
if m: if m:
if not m.impawards_id: if not m.impawards_id:
print 'impawards', ox.web.impawards.getUrl(id) print 'impawards', ox.web.impawards.get_url(id)
m.impawards_id = id m.impawards_id = id
m.save() m.save()
for poster in data['posters']: for poster in data['posters']:
addPoster(m, poster, 'impawards.com', m.imdb_id) addPoster(m, poster, 'impawards.com', m.imdb_id)
else: else:
print 'missing impawards', ox.web.impawards.getUrl(id) print 'missing impawards', ox.web.impawards.get_url(id)
for id in ox.web.criterion.getIds(): for id in ox.web.criterion.get_ids():
if id in ('626', '835'): if id in ('626', '835'):
continue continue
if models.MovieId.objects.all().filter(criterion_id=id).count() == 0: if models.MovieId.objects.all().filter(criterion_id=id).count() == 0:
print 'criterion', id print 'criterion', id
data = ox.web.criterion.getData(id, get_imdb=True) data = ox.web.criterion.get_data(id, get_imdb=True)
if data and 'imdbId' in data: if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId']) m = models.get_movie_id(data['imdbId'])
if not m.criterion_id: if not m.criterion_id:
m.criterion_id = id m.criterion_id = id
m.save() m.save()
@ -59,5 +58,5 @@ def getIds():
else: else:
print data['title'], "no imdbId" print data['title'], "no imdbId"
modules.getIds.run() modules.get_ids.run()

View file

@ -18,7 +18,7 @@ class Command(BaseCommand):
def handle(self, **options): def handle(self, **options):
import poster.models import poster.models
before_import = datetime.now() before_import = datetime.now()
lookup.cache.getIds() lookup.cache.get_ids()
for p in poster.models.PosterCache.objects.filter(image='', failed=False, created__gt=before_import): for p in poster.models.PosterCache.objects.filter(image='', failed=False, created__gt=before_import):
print p.url.encode('utf-8') print p.url.encode('utf-8')
p.get() p.get()

View file

@ -9,15 +9,15 @@ from django.db.models import Q, Max
from django.contrib.auth.models import User from django.contrib.auth.models import User
import ox import ox
from ox.normalize import canonicalName, normalizePath, stripAccents from ox.normalize import canonical_name, normalize_path, strip_accents
from ox import stripTags from ox import strip_tags
import ox.web.archive import ox.web.archive
import ox.web.imdb import ox.web.imdb
import ox.web.wikipedia import ox.web.wikipedia
import ox.web.allmovie import ox.web.allmovie
def getMovieIdByImdbId(imdb_id): def get_movie_id(imdb_id):
#movies moved in imdb #movies moved in imdb
imdb_id = { imdb_id = {
'0377059': '0343663', '0377059': '0343663',
@ -63,8 +63,8 @@ class MovieId(models.Model):
def updateFromWikipedia(self): def updateFromWikipedia(self):
if self.wikipedia_id: if self.wikipedia_id:
wikipedia_url = ox.web.wikipedia.getUrl(self.wikipedia_id) wikipedia_url = ox.web.wikipedia.get_url(self.wikipedia_id)
data = ox.web.wikipedia.getMovieData(wikipedia_url) data = ox.web.wikipedia.get_movie_data(wikipedia_url)
_key = {} _key = {}
for key in ('imdb_id', 'amg_id', 'archiveorg_id'): for key in ('imdb_id', 'amg_id', 'archiveorg_id'):
if key in data: if key in data:
@ -85,7 +85,7 @@ class MovieId(models.Model):
directors = data.get('director', []) directors = data.get('director', [])
self.director = u', '.join(directors) self.director = u', '.join(directors)
if not self.wikipedia_id: if not self.wikipedia_id:
self.wikipedia_id = ox.web.wikipedia.getId(ox.web.wikipedia.getUrlByImdb(self.imdb_id)) self.wikipedia_id = ox.web.wikipedia.get_id(ox.web.wikipedia.get_url(imdb=self.imdb_id))
if not self.wikipedia_id: if not self.wikipedia_id:
self.wikipedia_id=None self.wikipedia_id=None
#ignore wikipedia id if already used by another movie, #ignore wikipedia id if already used by another movie,
@ -108,35 +108,35 @@ class MovieId(models.Model):
self.episode_yeaer or '') self.episode_yeaer or '')
def suggested_name(self): def suggested_name(self):
return normalizePath(self.title) return normalize_path(self.title)
def suggested_path(self): def suggested_path(self):
if self.series_title: if self.series_title:
title = self.series_title title = self.series_title
return os.path.join('S', 'Series', title) return os.path.join('S', 'Series', title)
else: else:
directors = '; '.join(map(canonicalName, self.director.split(', '))) directors = '; '.join(map(canonical_name, self.director.split(', ')))
if not directors: directors = "Unknown Director" if not directors: directors = "Unknown Director"
title = self.title title = self.title
if self.year: if self.year:
title += ' (%s)' % self.year title += ' (%s)' % self.year
folder = stripAccents(directors[0].upper())[0] folder = strip_accents(directors[0].upper())[0]
return os.path.join(folder, normalizePath(directors), normalizePath(title)) return os.path.join(folder, normalize_path(directors), normalize_path(title))
def links(self): def links(self):
links = [] links = []
if self.imdb_id: if self.imdb_id:
links.append({'source': 'IMDb', links.append({'source': 'IMDb',
'url': ox.web.imdb.getUrl(self.imdb_id)}) 'url': ox.web.imdb.get_url(self.imdb_id)})
if self.wikipedia_id: if self.wikipedia_id:
links.append({'source': 'Wikipedia', links.append({'source': 'Wikipedia',
'url': ox.web.wikipedia.getUrl(self.wikipedia_id)}) 'url': ox.web.wikipedia.get_url(self.wikipedia_id)})
if self.criterion_id: if self.criterion_id:
links.append({'source': 'Criterion', links.append({'source': 'Criterion',
'url': ox.web.criterion.getUrl(self.criterion_id)}) 'url': ox.web.criterion.get_url(self.criterion_id)})
if self.archiveorg_id: if self.archiveorg_id:
links.append({'source': 'Internet Archive', links.append({'source': 'Internet Archive',
'url': ox.web.archive.getUrl(self.archiveorg_id)}) 'url': ox.web.archive.get_url(self.archiveorg_id)})
qs = u'"%s (%s)"'%(self.title, self.year) qs = u'"%s (%s)"'%(self.title, self.year)
links.append({'source': 'Google', links.append({'source': 'Google',
'url': 'http://google.com/search?q=%s' % quote(qs.encode('utf-8'))}) 'url': 'http://google.com/search?q=%s' % quote(qs.encode('utf-8'))})
@ -155,32 +155,32 @@ class MovieId(models.Model):
value = getattr(self, key) value = getattr(self, key)
if value: if value:
json['imdb.com'][{'imdb_id': 'id'}.get(key, key)] = value json['imdb.com'][{'imdb_id': 'id'}.get(key, key)] = value
json['imdb.com']['url'] = ox.web.imdb.getUrl(self.imdb_id) json['imdb.com']['url'] = ox.web.imdb.get_url(self.imdb_id)
if self.amg_id: if self.amg_id:
json['allmovie.com'] = { json['allmovie.com'] = {
'id': self.amg_id, 'id': self.amg_id,
'url': ox.web.allmovie.getUrl(self.amg_id) 'url': ox.web.allmovie.get_url(self.amg_id)
} }
if self.wikipedia_id: if self.wikipedia_id:
json['wikipedia.org'] = { json['wikipedia.org'] = {
'id': self.wikipedia_id, 'id': self.wikipedia_id,
'url': ox.web.wikipedia.getUrl(self.wikipedia_id) 'url': ox.web.wikipedia.get_url(self.wikipedia_id)
} }
if self.criterion_id: if self.criterion_id:
json['criterion.com'] = { json['criterion.com'] = {
'id': self.criterion_id, 'id': self.criterion_id,
'url': ox.web.criterion.getUrl(self.criterion_id) 'url': ox.web.criterion.get_url(self.criterion_id)
} }
if self.impawards_id: if self.impawards_id:
json['impawards.com'] = { json['impawards.com'] = {
'id': self.impawards_id, 'id': self.impawards_id,
'url': ox.web.impawards.getUrl(self.impawards_id) 'url': ox.web.impawards.get_url(self.impawards_id)
} }
if self.archiveorg_id: if self.archiveorg_id:
json['archive.org'] = { json['archive.org'] = {
'id': self.archiveorg_id, 'id': self.archiveorg_id,
'url': ox.web.archive.getUrl(self.archiveorg_id) 'url': ox.web.archive.get_url(self.archiveorg_id)
} }
if self.episode > -1: if self.episode > -1:

View file

@ -3,13 +3,13 @@ class IdModules(object):
def __init__(self): def __init__(self):
self._registry = {} self._registry = {}
def register(self, name, getIds): def register(self, name, get_ids):
if name not in self._registry: if name not in self._registry:
self._registry[name] = getIds self._registry[name] = get_ids
def run(self): def run(self):
for name in self._registry: for name in self._registry:
self._registry[name]() self._registry[name]()
getIds = IdModules() get_ids = IdModules()

View file

@ -12,7 +12,7 @@ import poster.models
@periodic_task(run_every=timedelta(days=1)) @periodic_task(run_every=timedelta(days=1))
def cronjob(**kwargs): def cronjob(**kwargs):
before_import = datetime.now() before_import = datetime.now()
cache.getIds() cache.get_ids()
for p in poster.models.PosterCache.objects.filter(image='', failed=False, created__gt=before_import): for p in poster.models.PosterCache.objects.filter(image='', failed=False, created__gt=before_import):
p.get() p.get()

View file

@ -20,7 +20,7 @@ def get_movie_id(request):
movieId = request.GET['itemId'] movieId = request.GET['itemId']
if movieId: if movieId:
if len(movieId) == 7: if len(movieId) == 7:
movie_id = models.getMovieIdByImdbId(imdb_id=movieId) movie_id = models.get_movie_id(imdb_id=movieId)
else: else:
try: try:
movie_id = models.MovieId.objects.get(oxdb_id=movieId) movie_id = models.MovieId.objects.get(oxdb_id=movieId)
@ -28,7 +28,7 @@ def get_movie_id(request):
movie_id = None movie_id = None
if 'imdb' in request.GET: if 'imdb' in request.GET:
movieId = request.GET['imdb'] movieId = request.GET['imdb']
movie_id = models.getMovieIdByImdbId(imdb_id=movieId) movie_id = models.get_movie_id(imdb_id=movieId)
elif 'oxdb' in request.GET: elif 'oxdb' in request.GET:
oxdbId = request.GET['oxdb'] oxdbId = request.GET['oxdb']
movie_id = models.MovieId.objects.get(oxdb_id=oxdbId) movie_id = models.MovieId.objects.get(oxdb_id=oxdbId)
@ -74,7 +74,7 @@ def get(request):
movieId = data['itemId'] movieId = data['itemId']
if movieId: if movieId:
if len(movieId) == 7: if len(movieId) == 7:
movie_id = models.getMovieIdByImdbId(imdb_id=movieId) movie_id = models.get_movie_id(imdb_id=movieId)
else: else:
try: try:
movie_id = models.MovieId.objects.get(oxdb_id=movieId) movie_id = models.MovieId.objects.get(oxdb_id=movieId)
@ -82,7 +82,7 @@ def get(request):
movie_id = None movie_id = None
if 'imdb' in data: if 'imdb' in data:
movieId = data['imdb'] movieId = data['imdb']
movie_id = models.getMovieIdByImdbId(imdb_id=movieId) movie_id = models.get_movie_id(imdb_id=movieId)
elif 'oxdb' in data: elif 'oxdb' in data:
oxdbId = data['oxdb'] oxdbId = data['oxdb']
movie_id = models.MovieId.objects.get(oxdb_id=oxdbId) movie_id = models.MovieId.objects.get(oxdb_id=oxdbId)

View file

@ -35,7 +35,7 @@ def find(info, guess=True):
return m return m
#For now fallback to ox.web.imdb.guess and try again #For now fallback to ox.web.imdb.guess and try again
if guess: if guess:
id = ox.web.imdb.getMovieId(info['title']) id = ox.web.imdb.get_movie_id(info['title'])
if id: if id:
i, created = Imdb.objects.get_or_create(imdb=id) i, created = Imdb.objects.get_or_create(imdb=id)
if created: if created:
@ -103,12 +103,12 @@ class Imdb(models.Model):
def get_new_ids(timeout=-1): def get_new_ids(timeout=-1):
known_ids = frozenset([i['imdb'] for i in Imdb.objects.all().values('imdb')]) known_ids = frozenset([i['imdb'] for i in Imdb.objects.all().values('imdb')])
robot = ox.cache.readUrl('http://www.imdb.com/robots.txt', timeout=timeout) robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout)
sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0] sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0]
sitemap = ox.cache.readUrl(sitemap_url, timeout=timeout) sitemap = ox.cache.read_url(sitemap_url, timeout=timeout)
urls = re.compile('<loc>(.+?)</loc>').findall(sitemap) urls = re.compile('<loc>(.+?)</loc>').findall(sitemap)
for url in sorted(urls, reverse=True): for url in sorted(urls, reverse=True):
s = ox.cache.readUrl(url, timeout=timeout) s = ox.cache.read_url(url, timeout=timeout)
ids = re.compile('<loc>http://www.imdb.com/title/tt(\d{7})/combined</loc>').findall(s) ids = re.compile('<loc>http://www.imdb.com/title/tt(\d{7})/combined</loc>').findall(s)
added = 0 added = 0
for i in frozenset(ids) - known_ids: for i in frozenset(ids) - known_ids:

View file

@ -12,16 +12,16 @@ from ox.utils import json
from api.actions import actions from api.actions import actions
from poster.models import getPosters from poster.models import getPosters
from lookup.models import getMovieIdByImdbId from lookup.models import get_movie_idByImdbId
import models import models
def posters(request, imdbId): def posters(request, imdbId):
movie_id = getMovieIdByImdbId(imdb_id=imdbId) movie_id = get_movie_idByImdbId(imdb_id=imdbId)
return getPosters(movie_id, request.build_absolute_uri('/')) return getPosters(movie_id, request.build_absolute_uri('/'))
def links(request, imdbId): def links(request, imdbId):
movie_id = getMovieIdByImdbId(imdb_id=imdbId) movie_id = get_movie_idByImdbId(imdb_id=imdbId)
links = [] links = []
if movie_id: if movie_id:
links = movie_id.links() links = movie_id.links()

View file

@ -24,7 +24,7 @@ from oxdata.lookup.models import MovieId
def getPosters(movie_id, url_prefix='', limit=lambda x, y: 0.3 < x/y < 1): def getPosters(movie_id, url_prefix='', limit=lambda x, y: 0.3 < x/y < 1):
if not movie_id: if not movie_id:
return {} return {}
getPosterUrls(movie_id) get_poster_urls(movie_id)
posters = {} posters = {}
if url_prefix.endswith('/'): url_prefix = url_prefix[:-1] if url_prefix.endswith('/'): url_prefix = url_prefix[:-1]
for p in PosterCache.objects.all().filter(movie_id=movie_id, failed=False).order_by('id'): for p in PosterCache.objects.all().filter(movie_id=movie_id, failed=False).order_by('id'):
@ -75,7 +75,7 @@ class PosterCache(models.Model):
url = self.url.encode('utf-8') url = self.url.encode('utf-8')
name = hashlib.sha1(url).hexdigest() name = hashlib.sha1(url).hexdigest()
try: try:
data = ox.net.readUrl(url) data = ox.net.read_url(url)
self.image.name = poster_path(self.url, os.path.basename(url)) self.image.name = poster_path(self.url, os.path.basename(url))
ox.makedirs(os.path.dirname(self.image.path)) ox.makedirs(os.path.dirname(self.image.path))
with open(self.image.path, 'w') as f: with open(self.image.path, 'w') as f:
@ -103,7 +103,7 @@ class PosterCache(models.Model):
self.save() self.save()
return self.image return self.image
def getPosterUrls(m): def get_poster_urls(m):
def addPoster(url, site, site_id): def addPoster(url, site, site_id):
if PosterCache.objects.all().filter(url=url, movie_id=m).count() == 0: if PosterCache.objects.all().filter(url=url, movie_id=m).count() == 0:
p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m) p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m)
@ -112,41 +112,41 @@ def getPosterUrls(m):
if m.imdb_id: if m.imdb_id:
#if settings.DEBUG: #if settings.DEBUG:
# print 'imdb' # print 'imdb'
poster = ox.web.imdb.getMoviePoster(m.imdb_id) poster = ox.web.imdb.get_movie_poster(m.imdb_id)
if poster: if poster:
addPoster(poster, 'imdb.com', m.imdb_id) addPoster(poster, 'imdb.com', m.imdb_id)
#site is sometimes down #site is sometimes down
#for poster in ox.web.movieposterdb.getData(m.imdb_id)['posters']: #for poster in ox.web.movieposterdb.get_data(m.imdb_id)['posters']:
# addPoster(poster, 'movieposterdb.com', m.imdb_id) # addPoster(poster, 'movieposterdb.com', m.imdb_id)
poster = ox.web.piratecinema.getPosterUrl(m.imdb_id) poster = ox.web.piratecinema.get_poster_url(m.imdb_id)
if poster: if poster:
addPoster(poster, 'piratecinema.org', m.imdb_id) addPoster(poster, 'piratecinema.org', m.imdb_id)
if m.criterion_id: if m.criterion_id:
#if settings.DEBUG: #if settings.DEBUG:
# print 'criterion', m.criterion_id # print 'criterion', m.criterion_id
for poster in ox.web.criterion.getData(m.criterion_id)['posters']: for poster in ox.web.criterion.get_data(m.criterion_id)['posters']:
addPoster(poster, 'criterion.com', m.criterion_id) addPoster(poster, 'criterion.com', m.criterion_id)
if m.wikipedia_id: if m.wikipedia_id:
#if settings.DEBUG: #if settings.DEBUG:
# print 'wikipedia' # print 'wikipedia'
poster = ox.web.wikipedia.getPosterUrl(m.wikipedia_id) poster = ox.web.wikipedia.get_poster_url(m.wikipedia_id)
if poster: if poster:
if PosterCache.objects.all().filter(url=poster).count() == 0: if PosterCache.objects.all().filter(url=poster).count() == 0:
addPoster(poster, 'wikipedia.org', m.wikipedia_id) addPoster(poster, 'wikipedia.org', m.wikipedia_id)
if m.impawards_id: if m.impawards_id:
#if settings.DEBUG: #if settings.DEBUG:
# print 'impawards' # print 'impawards'
data = ox.web.impawards.getData(m.impawards_id) data = ox.web.impawards.get_data(m.impawards_id)
if data and 'imdbId' in data: if data and 'imdbId' in data:
for poster in data['posters']: for poster in data['posters']:
addPoster(poster, 'impawards.com', m.imdb_id) addPoster(poster, 'impawards.com', m.imdb_id)
''' '''
if m.title and m.director: if m.title and m.director:
data = ox.web.apple.getMovieData(m.title, m.director) data = ox.web.apple.get_movie_data(m.title, m.director)
if data and 'poster' in data: if data and 'poster' in data:
addPoster(data['poster'], 'apple.com', m.imdb_id) addPoster(data['poster'], 'apple.com', m.imdb_id)
''' '''

View file

@ -5,7 +5,7 @@ from datetime import timedelta
from celery.decorators import task, periodic_task from celery.decorators import task, periodic_task
import models import models
from lookup.models import getMovieIdByImdbId from lookup.models import get_movie_id
''' '''
@ -16,11 +16,11 @@ def cronjob(**kwargs):
@task(ignore_resulsts=True, queue='default') @task(ignore_resulsts=True, queue='default')
def getMovieposteredb(imdb_id): def getMovieposteredb(imdb_id):
m = getMovieIdByImdbId(imdb_id) m = get_movie_id(imdb_id)
def addPoster(url, site, site_id): def addPoster(url, site, site_id):
if PosterCache.objects.all().filter(url=url, movie_id=m).count() == 0: if PosterCache.objects.all().filter(url=url, movie_id=m).count() == 0:
p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m) p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m)
p.save() p.save()
for poster in ox.web.movieposterdb.getData(imdb_id)['posters']: for poster in ox.web.movieposterdb.get_data(imdb_id)['posters']:
addPoster(poster, 'movieposterdb.com', imdb_id) addPoster(poster, 'movieposterdb.com', imdb_id)