- archives have hashes too
This commit is contained in:
parent
036f03a265
commit
9022ed674b
4 changed files with 24 additions and 7 deletions
|
@ -70,6 +70,7 @@ class Admin:
|
|||
archiveUrl = data['archiveUrl'],
|
||||
ttl = int(data['ttl']),
|
||||
)
|
||||
new.setHashId()
|
||||
raise redirect('archives')
|
||||
|
||||
@expose('.templates.admin_sortnames')
|
||||
|
|
|
@ -19,8 +19,9 @@ def updateSortAuthorNames():
|
|||
grab new input from archives
|
||||
'''
|
||||
def spiderArchives():
|
||||
for archive in Archives.select():
|
||||
for archive in Archive.select(Archive.q.initialized == True):
|
||||
if archive.pubDate - datetime.now() < timedelta(minutes = archive.ttl):
|
||||
print archive.archiveName
|
||||
archive.update()
|
||||
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ class ArchiveItem(SQLObject):
|
|||
authorSort = UnicodeCol(default = '')
|
||||
description = UnicodeCol() # text(for rss)
|
||||
html = UnicodeCol() #(for page, contains javascript)
|
||||
text = UnicodeCol() #Fulltext
|
||||
text = UnicodeCol(length = 2**25) #Fulltext
|
||||
relDate = DateTimeCol() #timestamp (item released)
|
||||
pubDate = DateTimeCol() #timestamp (item published)
|
||||
modDate = DateTimeCol() #timestamp (item published)
|
||||
|
@ -137,6 +137,9 @@ class Archive(SQLObject):
|
|||
|
||||
hashId = UnicodeCol(alternateID = True, length=128)
|
||||
|
||||
def setHashId(self):
|
||||
self.hashId = md5.new("%s" % self.id).hexdigest()
|
||||
|
||||
def _get_pubDateTimestamp(self):
|
||||
if self.initialized:
|
||||
return int(time.mktime(self.pubDate.timetuple()))
|
||||
|
@ -168,19 +171,24 @@ class Archive(SQLObject):
|
|||
|
||||
def update(self):
|
||||
result = simplejson.loads(read_url(self.files_url))
|
||||
if result.has_key('css'):
|
||||
if result and result.has_key('css'):
|
||||
self.css = read_url(self.full_url(result['css']))
|
||||
else:
|
||||
self.css = ''
|
||||
if result.has_key('js'):
|
||||
if result and result.has_key('js'):
|
||||
self.js = read_url(self.full_url(result['js']))
|
||||
else:
|
||||
self.js = ''
|
||||
result = simplejson.loads(read_url(self.update_url))
|
||||
items = result.get('items', [])
|
||||
print len(items)
|
||||
for id in items:
|
||||
print "updating / adding ", id
|
||||
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
|
||||
try:
|
||||
data = read_url(self.data_url(id))
|
||||
data = jsonLoadArchiveItem(data)
|
||||
except:
|
||||
print "failed to load ", id, "from ", self.data_url(id)
|
||||
continue
|
||||
q = ArchiveItem.select(AND(
|
||||
ArchiveItem.q.archiveItemId == id,
|
||||
ArchiveItem.q.archiveID == self.id))
|
||||
|
|
|
@ -2,12 +2,13 @@
|
|||
import pkg_resources
|
||||
pkg_resources.require("TurboGears")
|
||||
|
||||
from turbogears import update_config, start_server
|
||||
from turbogears import update_config, start_server, scheduler
|
||||
import cherrypy
|
||||
cherrypy.lowercase_api = True
|
||||
from os.path import *
|
||||
import sys
|
||||
|
||||
|
||||
# first look on the command line for a desired config file,
|
||||
# if it's not on the command line, then
|
||||
# look for setup.py in this directory. If it's not there, this script is
|
||||
|
@ -21,5 +22,11 @@ else:
|
|||
update_config(configfile="prod.cfg",modulename="oilarchive.config")
|
||||
|
||||
from oilarchive.controllers import Root
|
||||
from oilarchive import cronjobs
|
||||
|
||||
scheduler.add_interval_task(
|
||||
action=cronjobs.runCron, taskname='cronoil',
|
||||
initialdelay=10, interval=60,
|
||||
processmethod=scheduler.method.forked)
|
||||
|
||||
start_server(Root())
|
||||
|
|
Loading…
Reference in a new issue