oxdata/impawards/cache.py
2009-07-13 10:09:58 +02:00

53 lines
1.6 KiB
Python

# -*- coding: UTF-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
import os
from django.conf import settings
from oxlib.cache import getUrlUnicode
from oxlib import findRe
import oxlib.net
import oxweb.impawards
def getPosterFilename(id, url):
dirname = os.path.join(settings.DATA_ROOT, 'impawards.com', id[:1], id[:4], id)
filename = os.path.join(dirname, os.path.split(url)[1])
filename = os.path.normpath(filename)
return filename
def getPoster(id, url):
filename = getPosterFilename(id, url)
if not os.path.exists(filename):
if not os.path.exists(dirname):
os.makedirs(dirname)
data = oxlib.net.getUrl(url)
f = open(filename, 'w')
f.write(data)
f.close()
return filename
def archivePosters(init=False):
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
pages = int(findRe(html, '<a href = page(.*?).html>'))
for page in range(pages + 1, 0, -1):
if page <= pages:
html = getUrlUnicode('http://impawards.com/archives/page%s.html' % page, timeout = -1)
urls = oxweb.impawards.parseArchivePage(html)
for url in urls:
html = getUrlUnicode(url, timeout = -1)
data = oxweb.impawards.parseMoviePage(html)
service = 'impawards'
url = data['posterUrl']
imdbId = data['imdbId']
filename = getPosterFilename(imdbId, url)
if not os.path.exists(filename):
getPoster(imdbId, url)
elif not init:
return
def cron():
archivePosters()
def init():
archivePosters(True)