- archives have hashes too

This commit is contained in:
j 2007-04-03 16:04:32 +00:00
parent 036f03a265
commit 9022ed674b
4 changed files with 24 additions and 7 deletions

View file

@ -70,6 +70,7 @@ class Admin:
archiveUrl = data['archiveUrl'],
ttl = int(data['ttl']),
)
new.setHashId()
raise redirect('archives')
@expose('.templates.admin_sortnames')

View file

@ -19,8 +19,9 @@ def updateSortAuthorNames():
grab new input from archives
'''
def spiderArchives():
for archive in Archives.select():
for archive in Archive.select(Archive.q.initialized == True):
if archive.pubDate - datetime.now() < timedelta(minutes = archive.ttl):
print archive.archiveName
archive.update()

View file

@ -51,7 +51,7 @@ class ArchiveItem(SQLObject):
authorSort = UnicodeCol(default = '')
description = UnicodeCol() # text(for rss)
html = UnicodeCol() #(for page, contains javascript)
text = UnicodeCol() #Fulltext
text = UnicodeCol(length = 2**25) #Fulltext
relDate = DateTimeCol() #timestamp (item released)
pubDate = DateTimeCol() #timestamp (item published)
modDate = DateTimeCol() #timestamp (item published)
@ -137,6 +137,9 @@ class Archive(SQLObject):
hashId = UnicodeCol(alternateID = True, length=128)
def setHashId(self):
self.hashId = md5.new("%s" % self.id).hexdigest()
def _get_pubDateTimestamp(self):
if self.initialized:
return int(time.mktime(self.pubDate.timetuple()))
@ -168,19 +171,24 @@ class Archive(SQLObject):
def update(self):
result = simplejson.loads(read_url(self.files_url))
if result.has_key('css'):
if result and result.has_key('css'):
self.css = read_url(self.full_url(result['css']))
else:
self.css = ''
if result.has_key('js'):
if result and result.has_key('js'):
self.js = read_url(self.full_url(result['js']))
else:
self.js = ''
result = simplejson.loads(read_url(self.update_url))
items = result.get('items', [])
print len(items)
for id in items:
print "updating / adding ", id
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
try:
data = read_url(self.data_url(id))
data = jsonLoadArchiveItem(data)
except:
print "failed to load ", id, "from ", self.data_url(id)
continue
q = ArchiveItem.select(AND(
ArchiveItem.q.archiveItemId == id,
ArchiveItem.q.archiveID == self.id))

View file

@ -2,12 +2,13 @@
import pkg_resources
pkg_resources.require("TurboGears")
from turbogears import update_config, start_server
from turbogears import update_config, start_server, scheduler
import cherrypy
cherrypy.lowercase_api = True
from os.path import *
import sys
# first look on the command line for a desired config file,
# if it's not on the command line, then
# look for setup.py in this directory. If it's not there, this script is
@ -21,5 +22,11 @@ else:
update_config(configfile="prod.cfg",modulename="oilarchive.config")
from oilarchive.controllers import Root
from oilarchive import cronjobs
scheduler.add_interval_task(
action=cronjobs.runCron, taskname='cronoil',
initialdelay=10, interval=60,
processmethod=scheduler.method.forked)
start_server(Root())