# -*- coding: UTF-8 -*- # vi:si:et:sw=4:sts=4:ts=4 import os from django.conf import settings from oxlib.cache import getUrlUnicode from oxlib import findRe import oxlib.net import oxweb.impawards def getPosterFilename(id, url): dirname = os.path.join(settings.DATA_ROOT, 'impawards.com', id[:1], id[:4], id) filename = os.path.join(dirname, os.path.split(url)[1]) filename = os.path.normpath(filename) return filename def getPoster(id, url): filename = getPosterFilename(id, url) return filename def archivePosters(init=False): html = getUrlUnicode('http://impawards.com/archives/latest.html', timeout = 0) pages = int(findRe(html, '')) for page in range(pages + 1, 0, -1): if page <= pages: html = getUrlUnicode('http://impawards.com/archives/page%s.html' % page, timeout = -1) urls = oxweb.impawards.parseArchivePage(html) for url in urls: html = getUrlUnicode(url, timeout = -1) data = oxweb.impawards.parseMoviePage(html) service = 'impawards' url = data['posterUrl'] imdbId = data['imdbId'] filename = getPosterFilename(imdbId, url) if not os.path.exists(filename): getPoster(imdbId, url) elif not init: return def cron(): archivePosters() def init(): archivePosters(True)