use new python-ox api

This commit is contained in:
j 2012-08-15 17:15:59 +02:00
parent f6cb05b5fb
commit e267ba48df
11 changed files with 61 additions and 62 deletions

View file

@ -64,7 +64,7 @@ class CoverCache(models.Model):
url = self.url.encode('utf-8')
name = hashlib.sha1(url).hexdigest()
try:
data = ox.net.readUrl(url)
data = ox.net.read_url(url)
self.image.save(name, ContentFile(data))
except ox.net.urllib2.HTTPError, e:
#import traceback

View file

@ -3,8 +3,7 @@
import os
from django.conf import settings
from ox.cache import readUrlUnicode
from ox import findRe
from ox import find_re
import ox.web.criterion
import ox.web.imdb
import ox.web.impawards
@ -18,8 +17,8 @@ def addPoster(m, url, site, site_id):
p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m)
p.save()
def getIds():
for id in ox.web.impawards.getIds():
def get_ids():
for id in ox.web.impawards.get_ids():
if id in (
'2005/night',
'2007/hands_of_the_dragon',
@ -31,27 +30,27 @@ def getIds():
):
continue
if models.MovieId.objects.all().filter(impawards_id=id).count() == 0:
data = ox.web.impawards.getData(id)
data = ox.web.impawards.get_data(id)
if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId'])
m = models.get_movie_id(data['imdbId'])
if m:
if not m.impawards_id:
print 'impawards', ox.web.impawards.getUrl(id)
print 'impawards', ox.web.impawards.get_url(id)
m.impawards_id = id
m.save()
for poster in data['posters']:
addPoster(m, poster, 'impawards.com', m.imdb_id)
else:
print 'missing impawards', ox.web.impawards.getUrl(id)
print 'missing impawards', ox.web.impawards.get_url(id)
for id in ox.web.criterion.getIds():
for id in ox.web.criterion.get_ids():
if id in ('626', '835'):
continue
if models.MovieId.objects.all().filter(criterion_id=id).count() == 0:
print 'criterion', id
data = ox.web.criterion.getData(id, get_imdb=True)
data = ox.web.criterion.get_data(id, get_imdb=True)
if data and 'imdbId' in data:
m = models.getMovieIdByImdbId(data['imdbId'])
m = models.get_movie_id(data['imdbId'])
if not m.criterion_id:
m.criterion_id = id
m.save()
@ -59,5 +58,5 @@ def getIds():
else:
print data['title'], "no imdbId"
modules.getIds.run()
modules.get_ids.run()

View file

@ -18,7 +18,7 @@ class Command(BaseCommand):
def handle(self, **options):
import poster.models
before_import = datetime.now()
lookup.cache.getIds()
lookup.cache.get_ids()
for p in poster.models.PosterCache.objects.filter(image='', failed=False, created__gt=before_import):
print p.url.encode('utf-8')
p.get()

View file

@ -9,15 +9,15 @@ from django.db.models import Q, Max
from django.contrib.auth.models import User
import ox
from ox.normalize import canonicalName, normalizePath, stripAccents
from ox import stripTags
from ox.normalize import canonical_name, normalize_path, strip_accents
from ox import strip_tags
import ox.web.archive
import ox.web.imdb
import ox.web.wikipedia
import ox.web.allmovie
def getMovieIdByImdbId(imdb_id):
def get_movie_id(imdb_id):
#movies moved in imdb
imdb_id = {
'0377059': '0343663',
@ -63,8 +63,8 @@ class MovieId(models.Model):
def updateFromWikipedia(self):
if self.wikipedia_id:
wikipedia_url = ox.web.wikipedia.getUrl(self.wikipedia_id)
data = ox.web.wikipedia.getMovieData(wikipedia_url)
wikipedia_url = ox.web.wikipedia.get_url(self.wikipedia_id)
data = ox.web.wikipedia.get_movie_data(wikipedia_url)
_key = {}
for key in ('imdb_id', 'amg_id', 'archiveorg_id'):
if key in data:
@ -85,7 +85,7 @@ class MovieId(models.Model):
directors = data.get('director', [])
self.director = u', '.join(directors)
if not self.wikipedia_id:
self.wikipedia_id = ox.web.wikipedia.getId(ox.web.wikipedia.getUrlByImdb(self.imdb_id))
self.wikipedia_id = ox.web.wikipedia.get_id(ox.web.wikipedia.get_url(imdb=self.imdb_id))
if not self.wikipedia_id:
self.wikipedia_id=None
#ignore wikipedia id if already used by another movie,
@ -108,35 +108,35 @@ class MovieId(models.Model):
self.episode_yeaer or '')
def suggested_name(self):
return normalizePath(self.title)
return normalize_path(self.title)
def suggested_path(self):
if self.series_title:
title = self.series_title
return os.path.join('S', 'Series', title)
else:
directors = '; '.join(map(canonicalName, self.director.split(', ')))
directors = '; '.join(map(canonical_name, self.director.split(', ')))
if not directors: directors = "Unknown Director"
title = self.title
if self.year:
title += ' (%s)' % self.year
folder = stripAccents(directors[0].upper())[0]
return os.path.join(folder, normalizePath(directors), normalizePath(title))
folder = strip_accents(directors[0].upper())[0]
return os.path.join(folder, normalize_path(directors), normalize_path(title))
def links(self):
links = []
if self.imdb_id:
links.append({'source': 'IMDb',
'url': ox.web.imdb.getUrl(self.imdb_id)})
'url': ox.web.imdb.get_url(self.imdb_id)})
if self.wikipedia_id:
links.append({'source': 'Wikipedia',
'url': ox.web.wikipedia.getUrl(self.wikipedia_id)})
'url': ox.web.wikipedia.get_url(self.wikipedia_id)})
if self.criterion_id:
links.append({'source': 'Criterion',
'url': ox.web.criterion.getUrl(self.criterion_id)})
'url': ox.web.criterion.get_url(self.criterion_id)})
if self.archiveorg_id:
links.append({'source': 'Internet Archive',
'url': ox.web.archive.getUrl(self.archiveorg_id)})
'url': ox.web.archive.get_url(self.archiveorg_id)})
qs = u'"%s (%s)"'%(self.title, self.year)
links.append({'source': 'Google',
'url': 'http://google.com/search?q=%s' % quote(qs.encode('utf-8'))})
@ -155,32 +155,32 @@ class MovieId(models.Model):
value = getattr(self, key)
if value:
json['imdb.com'][{'imdb_id': 'id'}.get(key, key)] = value
json['imdb.com']['url'] = ox.web.imdb.getUrl(self.imdb_id)
json['imdb.com']['url'] = ox.web.imdb.get_url(self.imdb_id)
if self.amg_id:
json['allmovie.com'] = {
'id': self.amg_id,
'url': ox.web.allmovie.getUrl(self.amg_id)
'url': ox.web.allmovie.get_url(self.amg_id)
}
if self.wikipedia_id:
json['wikipedia.org'] = {
'id': self.wikipedia_id,
'url': ox.web.wikipedia.getUrl(self.wikipedia_id)
'url': ox.web.wikipedia.get_url(self.wikipedia_id)
}
if self.criterion_id:
json['criterion.com'] = {
'id': self.criterion_id,
'url': ox.web.criterion.getUrl(self.criterion_id)
'url': ox.web.criterion.get_url(self.criterion_id)
}
if self.impawards_id:
json['impawards.com'] = {
'id': self.impawards_id,
'url': ox.web.impawards.getUrl(self.impawards_id)
'url': ox.web.impawards.get_url(self.impawards_id)
}
if self.archiveorg_id:
json['archive.org'] = {
'id': self.archiveorg_id,
'url': ox.web.archive.getUrl(self.archiveorg_id)
'url': ox.web.archive.get_url(self.archiveorg_id)
}
if self.episode > -1:

View file

@ -3,13 +3,13 @@ class IdModules(object):
def __init__(self):
self._registry = {}
def register(self, name, getIds):
def register(self, name, get_ids):
if name not in self._registry:
self._registry[name] = getIds
self._registry[name] = get_ids
def run(self):
for name in self._registry:
self._registry[name]()
getIds = IdModules()
get_ids = IdModules()

View file

@ -12,7 +12,7 @@ import poster.models
@periodic_task(run_every=timedelta(days=1))
def cronjob(**kwargs):
before_import = datetime.now()
cache.getIds()
cache.get_ids()
for p in poster.models.PosterCache.objects.filter(image='', failed=False, created__gt=before_import):
p.get()

View file

@ -20,7 +20,7 @@ def get_movie_id(request):
movieId = request.GET['itemId']
if movieId:
if len(movieId) == 7:
movie_id = models.getMovieIdByImdbId(imdb_id=movieId)
movie_id = models.get_movie_id(imdb_id=movieId)
else:
try:
movie_id = models.MovieId.objects.get(oxdb_id=movieId)
@ -28,7 +28,7 @@ def get_movie_id(request):
movie_id = None
if 'imdb' in request.GET:
movieId = request.GET['imdb']
movie_id = models.getMovieIdByImdbId(imdb_id=movieId)
movie_id = models.get_movie_id(imdb_id=movieId)
elif 'oxdb' in request.GET:
oxdbId = request.GET['oxdb']
movie_id = models.MovieId.objects.get(oxdb_id=oxdbId)
@ -74,7 +74,7 @@ def get(request):
movieId = data['itemId']
if movieId:
if len(movieId) == 7:
movie_id = models.getMovieIdByImdbId(imdb_id=movieId)
movie_id = models.get_movie_id(imdb_id=movieId)
else:
try:
movie_id = models.MovieId.objects.get(oxdb_id=movieId)
@ -82,7 +82,7 @@ def get(request):
movie_id = None
if 'imdb' in data:
movieId = data['imdb']
movie_id = models.getMovieIdByImdbId(imdb_id=movieId)
movie_id = models.get_movie_id(imdb_id=movieId)
elif 'oxdb' in data:
oxdbId = data['oxdb']
movie_id = models.MovieId.objects.get(oxdb_id=oxdbId)

View file

@ -35,7 +35,7 @@ def find(info, guess=True):
return m
#For now fallback to ox.web.imdb.guess and try again
if guess:
id = ox.web.imdb.getMovieId(info['title'])
id = ox.web.imdb.get_movie_id(info['title'])
if id:
i, created = Imdb.objects.get_or_create(imdb=id)
if created:
@ -103,12 +103,12 @@ class Imdb(models.Model):
def get_new_ids(timeout=-1):
known_ids = frozenset([i['imdb'] for i in Imdb.objects.all().values('imdb')])
robot = ox.cache.readUrl('http://www.imdb.com/robots.txt', timeout=timeout)
robot = ox.cache.read_url('http://www.imdb.com/robots.txt', timeout=timeout)
sitemap_url = re.compile('\nSitemap: (http.+)').findall(robot)[0]
sitemap = ox.cache.readUrl(sitemap_url, timeout=timeout)
sitemap = ox.cache.read_url(sitemap_url, timeout=timeout)
urls = re.compile('<loc>(.+?)</loc>').findall(sitemap)
for url in sorted(urls, reverse=True):
s = ox.cache.readUrl(url, timeout=timeout)
s = ox.cache.read_url(url, timeout=timeout)
ids = re.compile('<loc>http://www.imdb.com/title/tt(\d{7})/combined</loc>').findall(s)
added = 0
for i in frozenset(ids) - known_ids:

View file

@ -12,16 +12,16 @@ from ox.utils import json
from api.actions import actions
from poster.models import getPosters
from lookup.models import getMovieIdByImdbId
from lookup.models import get_movie_idByImdbId
import models
def posters(request, imdbId):
movie_id = getMovieIdByImdbId(imdb_id=imdbId)
movie_id = get_movie_idByImdbId(imdb_id=imdbId)
return getPosters(movie_id, request.build_absolute_uri('/'))
def links(request, imdbId):
movie_id = getMovieIdByImdbId(imdb_id=imdbId)
movie_id = get_movie_idByImdbId(imdb_id=imdbId)
links = []
if movie_id:
links = movie_id.links()

View file

@ -24,7 +24,7 @@ from oxdata.lookup.models import MovieId
def getPosters(movie_id, url_prefix='', limit=lambda x, y: 0.3 < x/y < 1):
if not movie_id:
return {}
getPosterUrls(movie_id)
get_poster_urls(movie_id)
posters = {}
if url_prefix.endswith('/'): url_prefix = url_prefix[:-1]
for p in PosterCache.objects.all().filter(movie_id=movie_id, failed=False).order_by('id'):
@ -75,7 +75,7 @@ class PosterCache(models.Model):
url = self.url.encode('utf-8')
name = hashlib.sha1(url).hexdigest()
try:
data = ox.net.readUrl(url)
data = ox.net.read_url(url)
self.image.name = poster_path(self.url, os.path.basename(url))
ox.makedirs(os.path.dirname(self.image.path))
with open(self.image.path, 'w') as f:
@ -103,7 +103,7 @@ class PosterCache(models.Model):
self.save()
return self.image
def getPosterUrls(m):
def get_poster_urls(m):
def addPoster(url, site, site_id):
if PosterCache.objects.all().filter(url=url, movie_id=m).count() == 0:
p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m)
@ -112,41 +112,41 @@ def getPosterUrls(m):
if m.imdb_id:
#if settings.DEBUG:
# print 'imdb'
poster = ox.web.imdb.getMoviePoster(m.imdb_id)
poster = ox.web.imdb.get_movie_poster(m.imdb_id)
if poster:
addPoster(poster, 'imdb.com', m.imdb_id)
#site is sometimes down
#for poster in ox.web.movieposterdb.getData(m.imdb_id)['posters']:
#for poster in ox.web.movieposterdb.get_data(m.imdb_id)['posters']:
# addPoster(poster, 'movieposterdb.com', m.imdb_id)
poster = ox.web.piratecinema.getPosterUrl(m.imdb_id)
poster = ox.web.piratecinema.get_poster_url(m.imdb_id)
if poster:
addPoster(poster, 'piratecinema.org', m.imdb_id)
if m.criterion_id:
#if settings.DEBUG:
# print 'criterion', m.criterion_id
for poster in ox.web.criterion.getData(m.criterion_id)['posters']:
for poster in ox.web.criterion.get_data(m.criterion_id)['posters']:
addPoster(poster, 'criterion.com', m.criterion_id)
if m.wikipedia_id:
#if settings.DEBUG:
# print 'wikipedia'
poster = ox.web.wikipedia.getPosterUrl(m.wikipedia_id)
poster = ox.web.wikipedia.get_poster_url(m.wikipedia_id)
if poster:
if PosterCache.objects.all().filter(url=poster).count() == 0:
addPoster(poster, 'wikipedia.org', m.wikipedia_id)
if m.impawards_id:
#if settings.DEBUG:
# print 'impawards'
data = ox.web.impawards.getData(m.impawards_id)
data = ox.web.impawards.get_data(m.impawards_id)
if data and 'imdbId' in data:
for poster in data['posters']:
addPoster(poster, 'impawards.com', m.imdb_id)
'''
if m.title and m.director:
data = ox.web.apple.getMovieData(m.title, m.director)
data = ox.web.apple.get_movie_data(m.title, m.director)
if data and 'poster' in data:
addPoster(data['poster'], 'apple.com', m.imdb_id)
'''

View file

@ -5,7 +5,7 @@ from datetime import timedelta
from celery.decorators import task, periodic_task
import models
from lookup.models import getMovieIdByImdbId
from lookup.models import get_movie_id
'''
@ -16,11 +16,11 @@ def cronjob(**kwargs):
@task(ignore_resulsts=True, queue='default')
def getMovieposteredb(imdb_id):
m = getMovieIdByImdbId(imdb_id)
m = get_movie_id(imdb_id)
def addPoster(url, site, site_id):
if PosterCache.objects.all().filter(url=url, movie_id=m).count() == 0:
p = PosterCache(url=url, site=site, site_id=site_id, movie_id=m)
p.save()
for poster in ox.web.movieposterdb.getData(imdb_id)['posters']:
for poster in ox.web.movieposterdb.get_data(imdb_id)['posters']:
addPoster(poster, 'movieposterdb.com', imdb_id)