- archives have hashes too

2007-04-03 16:04:32 +00:00 · 2007-04-03 16:04:32 +00:00 · 9022ed674b
commit 9022ed674b
parent 036f03a265
4 changed files with 24 additions and 7 deletions
--- a/oilarchive/controllers.py
+++ b/oilarchive/controllers.py
@ -70,6 +70,7 @@ class Admin:
      archiveUrl = data['archiveUrl'],
      ttl = int(data['ttl']),
      )
+    new.setHashId()
    raise redirect('archives')
    
  @expose('.templates.admin_sortnames')
--- a/oilarchive/cronjobs.py
+++ b/oilarchive/cronjobs.py
@ -19,8 +19,9 @@ def updateSortAuthorNames():
  grab new input from archives
 '''
 def spiderArchives():
-  for archive in Archives.select():
+  for archive in Archive.select(Archive.q.initialized == True):
    if archive.pubDate - datetime.now() < timedelta(minutes = archive.ttl):
+      print archive.archiveName
      archive.update()


--- a/oilarchive/model.py
+++ b/oilarchive/model.py
@ -51,7 +51,7 @@ class ArchiveItem(SQLObject):
  authorSort = UnicodeCol(default = '')
  description = UnicodeCol() # text(for rss)
  html = UnicodeCol() #(for page, contains javascript)
-  text = UnicodeCol() #Fulltext 
+  text = UnicodeCol(length = 2**25) #Fulltext 
  relDate = DateTimeCol() #timestamp (item released)
  pubDate = DateTimeCol() #timestamp (item published)
  modDate = DateTimeCol() #timestamp (item published)
@ -137,6 +137,9 @@ class Archive(SQLObject):
  
  hashId = UnicodeCol(alternateID = True, length=128)
  
+  def setHashId(self):
+    self.hashId = md5.new("%s" % self.id).hexdigest()
+    
  def _get_pubDateTimestamp(self):
    if self.initialized:
      return int(time.mktime(self.pubDate.timetuple()))
@ -168,19 +171,24 @@ class Archive(SQLObject):
    
  def update(self):
    result = simplejson.loads(read_url(self.files_url))
-    if result.has_key('css'):
+    if result and result.has_key('css'):
      self.css = read_url(self.full_url(result['css']))
    else:
      self.css = ''
-    if result.has_key('js'):
+    if result and result.has_key('js'):
      self.js = read_url(self.full_url(result['js']))
    else:
      self.js = ''
    result = simplejson.loads(read_url(self.update_url))
    items = result.get('items', [])
+    print len(items)
    for id in items:
-      print "updating / adding ", id
-      data = jsonLoadArchiveItem(read_url(self.data_url(id)))
+      try:
+        data = read_url(self.data_url(id))
+        data = jsonLoadArchiveItem(data)
+      except:
+        print "failed to load ", id, "from ", self.data_url(id)
+        continue
      q = ArchiveItem.select(AND(
            ArchiveItem.q.archiveItemId == id, 
            ArchiveItem.q.archiveID == self.id))
--- a/start-oilarchive.py
+++ b/start-oilarchive.py
@ -2,12 +2,13 @@
 import pkg_resources
 pkg_resources.require("TurboGears")

-from turbogears import update_config, start_server
+from turbogears import update_config, start_server, scheduler
 import cherrypy
 cherrypy.lowercase_api = True
 from os.path import *
 import sys

+
 # first look on the command line for a desired config file,
 # if it's not on the command line, then
 # look for setup.py in this directory. If it's not there, this script is
@ -21,5 +22,11 @@ else:
    update_config(configfile="prod.cfg",modulename="oilarchive.config")

 from oilarchive.controllers import Root
+from oilarchive import cronjobs

+scheduler.add_interval_task(
+  action=cronjobs.runCron, taskname='cronoil', 
+  initialdelay=10, interval=60,
+  processmethod=scheduler.method.forked)
+  
 start_server(Root())