2009-07-13 08:09:58 +00:00
|
|
|
# -*- coding: UTF-8 -*-
|
|
|
|
# vi:si:et:sw=4:sts=4:ts=4
|
|
|
|
import os
|
|
|
|
|
|
|
|
from django.conf import settings
|
|
|
|
from oxlib.cache import getUrlUnicode
|
|
|
|
from oxlib import findRe
|
|
|
|
import oxlib.net
|
|
|
|
import oxweb.impawards
|
|
|
|
|
|
|
|
|
|
|
|
def getPosterFilename(id, url):
|
|
|
|
dirname = os.path.join(settings.DATA_ROOT, 'impawards.com', id[:1], id[:4], id)
|
|
|
|
filename = os.path.join(dirname, os.path.split(url)[1])
|
|
|
|
filename = os.path.normpath(filename)
|
|
|
|
return filename
|
|
|
|
|
|
|
|
def getPoster(id, url):
|
|
|
|
filename = getPosterFilename(id, url)
|
2009-07-13 10:46:41 +00:00
|
|
|
oxlib.net.saveUrl(url, filename)
|
2009-07-13 08:09:58 +00:00
|
|
|
return filename
|
|
|
|
|
|
|
|
def archivePosters(init=False):
|
|
|
|
html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0)
|
|
|
|
pages = int(findRe(html, '<a href = page(.*?).html>'))
|
|
|
|
for page in range(pages + 1, 0, -1):
|
|
|
|
if page <= pages:
|
|
|
|
html = getUrlUnicode('http://impawards.com/archives/page%s.html' % page, timeout = -1)
|
|
|
|
urls = oxweb.impawards.parseArchivePage(html)
|
|
|
|
for url in urls:
|
|
|
|
html = getUrlUnicode(url, timeout = -1)
|
|
|
|
data = oxweb.impawards.parseMoviePage(html)
|
|
|
|
service = 'impawards'
|
|
|
|
url = data['posterUrl']
|
|
|
|
imdbId = data['imdbId']
|
|
|
|
filename = getPosterFilename(imdbId, url)
|
|
|
|
if not os.path.exists(filename):
|
|
|
|
getPoster(imdbId, url)
|
|
|
|
elif not init:
|
|
|
|
return
|
|
|
|
|
|
|
|
def cron():
|
|
|
|
archivePosters()
|
|
|
|
|
|
|
|
def init():
|
|
|
|
archivePosters(True)
|
|
|
|
|