diff --git a/scrapeit/btutils.py b/scrapeit/btutils.py index 2a76f0c..461fd8b 100644 --- a/scrapeit/btutils.py +++ b/scrapeit/btutils.py @@ -30,26 +30,29 @@ def torrentsWeLike(link): return True return False -def movieType(movie): - if 'cam' in movie['title'].lower(): +def movieType(title): + title = title.lower() + if 'trailer' in title: + return 'trailer' + if 'cam' in title: return 'cam' - if 'vcd' in movie['title'].lower(): + if 'vcd' in title: return 'vcd' for key in ('telesync', 'telecine', '.ts', '.tc', ' tc ', ' ts', 'ts-screener'): - if key in movie['title'].lower(): + if key in title: return 'telecine' - for key in ('dvdrip', 'dvdscrs'): - if key in movie['title'].lower(): + for key in ('dvdrip', 'dvdscrs', 'dvdscr', 'dvd rip'): + if key in title: return 'dvdrip' - if 'screener' in movie['title'].lower(): + if 'screener' in title: return 'screener' - if 'xvid' in movie['title'].lower(): + if 'xvid' in title: return 'Xvid' - if '1080p' in movie['title'].lower(): + if '1080p' in title: return '1080p' - if '720p' in movie['title'].lower(): + if '720p' in title: return '720p' - if 'dvdr' in movie['title'].lower(): + if 'dvdr' in title: return 'DVDR' return '' @@ -61,7 +64,7 @@ def filterMovies(movies): movie['imdb'] = imdb_id[0] else: movie['imdb'] = '' - movie['source_type'] = movieType(movie) + movie['source_type'] = movieType(movie['title']) m2.append(movie) return m2 diff --git a/scrapeit/cache.py b/scrapeit/cache.py new file mode 100644 index 0000000..bc25248 --- /dev/null +++ b/scrapeit/cache.py @@ -0,0 +1,50 @@ +# -*- Mode: Python; -*- +# -*- coding: utf-8 -*- +# vi:si:et:sw=2:sts=2:ts=2 + +import urllib2 +from urllib import quote +import re, time +import os +import time + +import utils +import chardet +import imdbpy_utils + + +cache_base = "/var/cache/scrapeit/cache/" +cache_timeout = 30*24*60*60 # 30 days + +def read_url(url): + cache_file = os.cache_file.join(cache_base, url.replace('http://','')) + if cache_file.endswith('/'): + cache_file = "%sindex.html" % cache_file + if os.cache_file.isdir(cache_file): + cache_file = os.cache_file.join(cache_file, "index.html") + ctime = os.stat(cache_file).st_ctime + now = time.mktime(time.localtime()) + file_age = now-ctime + print cache_timeout-file_age + if file_age < cache_timeout and os.cache_file.exists(cache_file): + f = open(cache_file) + data = f.read() + f.close() + return data + else: + data = utils.read_url(url) + folder = os.cache_file.dirname(cache_file) + if not os.cache_file.exists(folder): + os.makedirs(folder) + f = open(cache_file, 'w') + f.write(data) + f.close() + return data + +def read_url_utf8(url): + data = read_url(url) + encoding = chardet.detect(data)['encoding'] + if not encoding: encoding = 'latin-1' + data = unicode(data, encoding) + return data + diff --git a/scrapeit/imdb.py b/scrapeit/imdb.py index 84a9b1a..1d16709 100644 --- a/scrapeit/imdb.py +++ b/scrapeit/imdb.py @@ -18,36 +18,8 @@ import utils import chardet import imdbpy_utils -cache_base = "/var/cache/scrapeit/cache/" +from cache import read_url, read_url_utf8 -def read_url_utf8(url): - data = read_url(url) - encoding = chardet.detect(data)['encoding'] - if not encoding: encoding = 'latin-1' - data = unicode(data, encoding) - return data - -def read_url(url): - path = os.path.join(cache_base, url.replace('http://','')) - if path.endswith('/'): - path = "%sindex.html" % path - if os.path.isdir(path): - path = "%s/index.html" % path - if os.path.exists(path): - f = open(path) - data = f.read() - f.close() - return data - else: - data = utils.read_url(url) - folder = os.path.dirname(path) - if not os.path.exists(folder): - os.makedirs(folder) - f = open(path, 'w') - f.write(data) - f.close() - return data - def _get_data(url): data = None try: