refracture cache and make it selfexpire after 30 days

This commit is contained in:
j 2008-03-18 14:58:01 +00:00
parent 6bedcaa9d6
commit 36a70bb365
3 changed files with 66 additions and 41 deletions

View File

@ -30,26 +30,29 @@ def torrentsWeLike(link):
return True
return False
def movieType(movie):
if 'cam' in movie['title'].lower():
def movieType(title):
title = title.lower()
if 'trailer' in title:
return 'trailer'
if 'cam' in title:
return 'cam'
if 'vcd' in movie['title'].lower():
if 'vcd' in title:
return 'vcd'
for key in ('telesync', 'telecine', '.ts', '.tc', ' tc ', ' ts', 'ts-screener'):
if key in movie['title'].lower():
if key in title:
return 'telecine'
for key in ('dvdrip', 'dvdscrs'):
if key in movie['title'].lower():
for key in ('dvdrip', 'dvdscrs', 'dvdscr', 'dvd rip'):
if key in title:
return 'dvdrip'
if 'screener' in movie['title'].lower():
if 'screener' in title:
return 'screener'
if 'xvid' in movie['title'].lower():
if 'xvid' in title:
return 'Xvid'
if '1080p' in movie['title'].lower():
if '1080p' in title:
return '1080p'
if '720p' in movie['title'].lower():
if '720p' in title:
return '720p'
if 'dvdr' in movie['title'].lower():
if 'dvdr' in title:
return 'DVDR'
return ''
@ -61,7 +64,7 @@ def filterMovies(movies):
movie['imdb'] = imdb_id[0]
else:
movie['imdb'] = ''
movie['source_type'] = movieType(movie)
movie['source_type'] = movieType(movie['title'])
m2.append(movie)
return m2

50
scrapeit/cache.py Normal file
View File

@ -0,0 +1,50 @@
# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
import urllib2
from urllib import quote
import re, time
import os
import time
import utils
import chardet
import imdbpy_utils
cache_base = "/var/cache/scrapeit/cache/"
cache_timeout = 30*24*60*60 # 30 days
def read_url(url):
cache_file = os.cache_file.join(cache_base, url.replace('http://',''))
if cache_file.endswith('/'):
cache_file = "%sindex.html" % cache_file
if os.cache_file.isdir(cache_file):
cache_file = os.cache_file.join(cache_file, "index.html")
ctime = os.stat(cache_file).st_ctime
now = time.mktime(time.localtime())
file_age = now-ctime
print cache_timeout-file_age
if file_age < cache_timeout and os.cache_file.exists(cache_file):
f = open(cache_file)
data = f.read()
f.close()
return data
else:
data = utils.read_url(url)
folder = os.cache_file.dirname(cache_file)
if not os.cache_file.exists(folder):
os.makedirs(folder)
f = open(cache_file, 'w')
f.write(data)
f.close()
return data
def read_url_utf8(url):
data = read_url(url)
encoding = chardet.detect(data)['encoding']
if not encoding: encoding = 'latin-1'
data = unicode(data, encoding)
return data

View File

@ -18,36 +18,8 @@ import utils
import chardet
import imdbpy_utils
cache_base = "/var/cache/scrapeit/cache/"
from cache import read_url, read_url_utf8
def read_url_utf8(url):
data = read_url(url)
encoding = chardet.detect(data)['encoding']
if not encoding: encoding = 'latin-1'
data = unicode(data, encoding)
return data
def read_url(url):
path = os.path.join(cache_base, url.replace('http://',''))
if path.endswith('/'):
path = "%sindex.html" % path
if os.path.isdir(path):
path = "%s/index.html" % path
if os.path.exists(path):
f = open(path)
data = f.read()
f.close()
return data
else:
data = utils.read_url(url)
folder = os.path.dirname(path)
if not os.path.exists(folder):
os.makedirs(folder)
f = open(path, 'w')
f.write(data)
f.close()
return data
def _get_data(url):
data = None
try: