refactor imdb data loading

This commit is contained in:
j 2012-09-25 13:20:48 +02:00
parent 6bf5df57bb
commit 4c4164012b
2 changed files with 64 additions and 59 deletions

View file

@ -1,12 +1,18 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import re
import unicodedata
from urllib import quote
from django.db import models
from django.conf import settings
import ox
from lookup.models import get_movie_id
from poster.models import getPosters
def normalize_value(value):
if isinstance(value, str):
value = value.decode('utf-8')
@ -93,6 +99,63 @@ class Imdb(models.Model):
self.invalid = True
self.save()
def data(self, request=None, timeout=ox.cache.cache_timeout):
data = ox.web.imdb.Imdb(self.imdb, timeout=timeout)
def fix_links(t):
def fix_names(m):
return '<a href="/name=%s">%s</a>' % (
quote(m.group(2).encode('utf-8')), m.group(2)
)
t = re.sub('<a href="(/name/.*?/)">(.*?)</a>', fix_names, t)
def fix_titles(m):
return '<a href="/title=%s">%s</a>' % (
quote(m.group(2).encode('utf-8')), m.group(2)
)
t = re.sub('<a href="(/title/.*?/)">(.*?)</a>', fix_titles, t)
return t
if 'trivia' in data:
data['trivia'] = [fix_links(t) for t in data['trivia']]
if 'summary' in data:
data['summary'] = fix_links(data['summary'])
if 'rating' in data:
data['rating'] = float(data['rating']) * 10
if 'votes' in data:
max_votes = ox.web.imdb.max_votes()
data['votes'] = 100 * float(data['votes']) / max_votes
if 'reviews' in data:
reviews = []
for r in data['reviews']:
for url in settings.REVIEW_WHITELIST:
if url in r[0]:
reviews.append({
'source': settings.REVIEW_WHITELIST[url],
'url': r[0]
})
data['reviews'] = reviews
if not data['reviews']:
del data['reviews']
data['links'] = self.links()
data['posters'] = self.posters(request)
return data
def links(self):
links = []
movie_id = get_movie_id(imdb_id=self.imdb)
if movie_id:
links = movie_id.links()
return links
def posters(self, request=None):
movie_id = get_movie_id(imdb_id=self.imdb)
return getPosters(movie_id, request and request.build_absolute_uri('/') or '')
def json(self):
j = {}
j['id'] = self.imdb

View file

@ -3,30 +3,14 @@
from __future__ import division
import re
from urllib import quote
from django.conf import settings
from ox.django.shortcuts import render_to_json_response, json_response
import ox.web.imdb
from ox.utils import json
from api.actions import actions
from poster.models import getPosters
from lookup.models import get_movie_id
import models
def posters(request, imdbId):
movie_id = get_movie_id(imdb_id=imdbId)
return getPosters(movie_id, request.build_absolute_uri('/'))
def links(request, imdbId):
movie_id = get_movie_id(imdb_id=imdbId)
links = []
if movie_id:
links = movie_id.links()
return links
def getId(request):
data = json.loads(request.POST['data'])
response = json_response()
@ -44,52 +28,10 @@ def getData(request):
data = json.loads(request.POST['data'])
id = data['id']
if len(id) == 7:
data = ox.web.imdb.Imdb(id)
i, created = models.Imdb.objects.get_or_create(imdb=id)
if created:
i.update()
def fix_links(t):
def fix_names(m):
return '<a href="/name=%s">%s</a>' % (
quote(m.group(2).encode('utf-8')), m.group(2)
)
t = re.sub('<a href="(/name/.*?/)">(.*?)</a>', fix_names, t)
def fix_titles(m):
return '<a href="/title=%s">%s</a>' % (
quote(m.group(2).encode('utf-8')), m.group(2)
)
t = re.sub('<a href="(/title/.*?/)">(.*?)</a>', fix_titles, t)
return t
if 'trivia' in data:
data['trivia'] = [fix_links(t) for t in data['trivia']]
if 'summary' in data:
data['summary'] = fix_links(data['summary'])
if 'rating' in data:
data['rating'] = float(data['rating']) * 10
if 'votes' in data:
max_votes = ox.web.imdb.max_votes()
data['votes'] = 100 * float(data['votes']) / max_votes
if 'reviews' in data:
reviews = []
for r in data['reviews']:
for url in settings.REVIEW_WHITELIST:
if url in r[0]:
reviews.append({
'source': settings.REVIEW_WHITELIST[url],
'url': r[0]
})
data['reviews'] = reviews
if not data['reviews']:
del data['reviews']
data['posters'] = posters(request, id)
data['links'] = links(request, id)
data = i.data(request)
response['data'] = data
else:
response['status'] = {'text':'not found', 'code': 404}