compress cache for html,text,css,js
This commit is contained in:
parent
f0065dd756
commit
e8aa6fe1e4
1 changed files with 69 additions and 8 deletions
77
ox/cache.py
77
ox/cache.py
|
@ -2,6 +2,7 @@
|
||||||
# vi:si:et:sw=4:sts=4:ts=4
|
# vi:si:et:sw=4:sts=4:ts=4
|
||||||
# GPL 2008
|
# GPL 2008
|
||||||
import gzip
|
import gzip
|
||||||
|
import zlib
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import StringIO
|
import StringIO
|
||||||
|
@ -19,6 +20,16 @@ from net import DEFAULT_HEADERS, getEncoding
|
||||||
|
|
||||||
cache_timeout = 30*24*60*60 # default is 30 days
|
cache_timeout = 30*24*60*60 # default is 30 days
|
||||||
|
|
||||||
|
COMPRESS_TYPES = (
|
||||||
|
'text/html',
|
||||||
|
'text/plain',
|
||||||
|
'text/xml',
|
||||||
|
'application/xhtml+xml',
|
||||||
|
'application/x-javascript',
|
||||||
|
'application/javascript',
|
||||||
|
'application/ecmascript',
|
||||||
|
'application/rss+xml'
|
||||||
|
)
|
||||||
|
|
||||||
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
|
||||||
'''
|
'''
|
||||||
|
@ -118,7 +129,16 @@ def _connectDb():
|
||||||
conn.text_factory = str
|
conn.text_factory = str
|
||||||
return conn
|
return conn
|
||||||
|
|
||||||
def _createDb(c):
|
def _getSetting(c, key, default=None):
|
||||||
|
c.execute('SELECT value FROM setting WHERE key = ?', (key, ))
|
||||||
|
for row in c:
|
||||||
|
return row[0]
|
||||||
|
return default
|
||||||
|
|
||||||
|
def _setSetting(c, key, value):
|
||||||
|
c.execute(u'INSERT OR REPLACE INTO setting values (?, ?)', (key, str(value)))
|
||||||
|
|
||||||
|
def _createDb(conn, c):
|
||||||
# Create table and indexes
|
# Create table and indexes
|
||||||
c.execute('''CREATE TABLE IF NOT EXISTS cache (url_hash varchar(42) unique, domain text, url text,
|
c.execute('''CREATE TABLE IF NOT EXISTS cache (url_hash varchar(42) unique, domain text, url text,
|
||||||
post_data text, headers text, created int, data blob, only_headers int)''')
|
post_data text, headers text, created int, data blob, only_headers int)''')
|
||||||
|
@ -126,6 +146,11 @@ def _createDb(c):
|
||||||
c.execute('''CREATE INDEX IF NOT EXISTS cache_url ON cache (url)''')
|
c.execute('''CREATE INDEX IF NOT EXISTS cache_url ON cache (url)''')
|
||||||
c.execute('''CREATE INDEX IF NOT EXISTS cache_url_hash ON cache (url_hash)''')
|
c.execute('''CREATE INDEX IF NOT EXISTS cache_url_hash ON cache (url_hash)''')
|
||||||
|
|
||||||
|
c.execute('''CREATE TABLE IF NOT EXISTS setting (key varchar(1024) unique, value text)''')
|
||||||
|
if int(_getSetting(c, 'version', 0)) < 1:
|
||||||
|
_setSetting(c, 'version', 1)
|
||||||
|
c.execute('''ALTER TABLE cache ADD compressed INT DEFAULT 0''')
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
def _readUrlCache(url, data, headers=DEFAULT_HEADERS, timeout=-1, value="data"):
|
def _readUrlCache(url, data, headers=DEFAULT_HEADERS, timeout=-1, value="data"):
|
||||||
r = None
|
r = None
|
||||||
|
@ -139,9 +164,9 @@ def _readUrlCache(url, data, headers=DEFAULT_HEADERS, timeout=-1, value="data"):
|
||||||
|
|
||||||
conn = _connectDb()
|
conn = _connectDb()
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
_createDb(c)
|
_createDb(conn, c)
|
||||||
|
|
||||||
sql = 'SELECT %s FROM cache WHERE url_hash=?' % value
|
sql = 'SELECT %s, compressed FROM cache WHERE url_hash=?' % value
|
||||||
if timeout > 0:
|
if timeout > 0:
|
||||||
now = time.mktime(time.localtime())
|
now = time.mktime(time.localtime())
|
||||||
t = (url_hash, now-timeout)
|
t = (url_hash, now-timeout)
|
||||||
|
@ -154,7 +179,10 @@ def _readUrlCache(url, data, headers=DEFAULT_HEADERS, timeout=-1, value="data"):
|
||||||
for row in c:
|
for row in c:
|
||||||
r = row[0]
|
r = row[0]
|
||||||
if value == 'data':
|
if value == 'data':
|
||||||
r = str(r)
|
if row[1] == 1:
|
||||||
|
r = zlib.decompress(r)
|
||||||
|
else:
|
||||||
|
r = str(r)
|
||||||
break
|
break
|
||||||
|
|
||||||
c.close()
|
c.close()
|
||||||
|
@ -173,7 +201,7 @@ def _saveUrlCache(url, post_data, data, headers):
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
|
||||||
# Create table if not exists
|
# Create table if not exists
|
||||||
_createDb(c)
|
_createDb(conn, c)
|
||||||
|
|
||||||
# Insert a row of data
|
# Insert a row of data
|
||||||
if not post_data: post_data=""
|
if not post_data: post_data=""
|
||||||
|
@ -182,8 +210,15 @@ def _saveUrlCache(url, post_data, data, headers):
|
||||||
only_headers = 1
|
only_headers = 1
|
||||||
data = ""
|
data = ""
|
||||||
created = time.mktime(time.localtime())
|
created = time.mktime(time.localtime())
|
||||||
t = (url_hash, domain, url, post_data, json.dumps(headers), created, sqlite3.Binary(data), only_headers)
|
content_type = headers.get('content-type', '').split(';')[0].strip()
|
||||||
c.execute(u"""INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?, ?, ?)""", t)
|
if content_type in COMPRESS_TYPES:
|
||||||
|
compressed = 1
|
||||||
|
data = zlib.compress(data)
|
||||||
|
else:
|
||||||
|
compressed = 0
|
||||||
|
data = sqlite3.Binary(data)
|
||||||
|
t = (url_hash, domain, url, post_data, json.dumps(headers), created, sqlite3.Binary(data), only_headers, compressed)
|
||||||
|
c.execute(u"""INSERT OR REPLACE INTO cache values (?, ?, ?, ?, ?, ?, ?, ?, ?)""", t)
|
||||||
|
|
||||||
# Save (commit) the changes and clean up
|
# Save (commit) the changes and clean up
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
@ -198,7 +233,7 @@ def migrate_to_db():
|
||||||
|
|
||||||
conn = _connectDb()
|
conn = _connectDb()
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
_createDb(c)
|
_createDb(conn, c)
|
||||||
|
|
||||||
files = glob.glob(_getCacheBase() + "/*/*/*/*/*")
|
files = glob.glob(_getCacheBase() + "/*/*/*/*/*")
|
||||||
_files = filter(lambda x: not x.endswith(".headers"), files)
|
_files = filter(lambda x: not x.endswith(".headers"), files)
|
||||||
|
@ -222,3 +257,29 @@ def migrate_to_db():
|
||||||
c.close()
|
c.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
def compress_db():
|
||||||
|
conn = _connectDb()
|
||||||
|
c = conn.cursor()
|
||||||
|
_createDb(conn, c)
|
||||||
|
c.execute(u"""SELECT url_hash FROM cache WHERE compressed = 0""")
|
||||||
|
ids = [row[0] for row in c]
|
||||||
|
for url_hash in ids:
|
||||||
|
c.execute(u"""SELECT headers, data FROM cache WHERE url_hash = ?""", (url_hash, ))
|
||||||
|
headers = {}
|
||||||
|
for row in c:
|
||||||
|
headers = json.loads(row[0])
|
||||||
|
data = row[1]
|
||||||
|
|
||||||
|
content_type = headers.get('content-type', '').split(';')[0].strip()
|
||||||
|
if content_type in COMPRESS_TYPES:
|
||||||
|
data = zlib.compress(data)
|
||||||
|
t = (sqlite3.Binary(data), url_hash)
|
||||||
|
print url_hash, 'update'
|
||||||
|
c.execute('UPDATE cache SET compressed = 1, data = ? WHERE url_hash = ?', t)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
print "optimizing database"
|
||||||
|
c.execute('VACUUM')
|
||||||
|
conn.commit()
|
||||||
|
c.close()
|
||||||
|
conn.close()
|
||||||
|
|
Loading…
Reference in a new issue