spider now also collects js/css files and puts the in the template

This commit is contained in:
j 2007-04-03 13:18:22 +00:00
commit 036f03a265
6 changed files with 90 additions and 30 deletions

View file

@ -5,6 +5,7 @@
from datetime import datetime
import time
from urllib import quote
import urlparse
import md5
from turbogears.database import PackageHub
@ -118,8 +119,8 @@ class ArchiveItem(SQLObject):
self.updateHashID()
def updateHashID(self):
salt = '%s/%s/%s' % (self.archive.archiveName, self.author, self.title)
self.hashID = md5.new(salt).hexdigest()
salt = u'%s/%s' % (self.archive.archiveName, self.archiveItemId)
self.hashID = md5.new(salt.encode('utf-8')).hexdigest()
class Archive(SQLObject):
@ -130,10 +131,17 @@ class Archive(SQLObject):
pubDate = DateTimeCol(default=datetime.now)
modDate = DateTimeCol(default=datetime.now)
created = DateTimeCol(default=datetime.now)
def _get_pubDateTimestamp(self):
return int(time.mktime(self.pubDate.timetuple()))
initialized = BoolCol(default = False)
css = UnicodeCol(default='')
js = UnicodeCol(default='')
hashId = UnicodeCol(alternateID = True, length=128)
def _get_pubDateTimestamp(self):
if self.initialized:
return int(time.mktime(self.pubDate.timetuple()))
return -1
def _query_url(self, query):
url = "%s?" % self.archiveUrl
url += "&".join(["%s=%s" % (key, quote("%s" % query[key])) for key in query])
@ -142,6 +150,9 @@ class Archive(SQLObject):
def _get_update_url(self):
return self._query_url({'modDate': self.pubDateTimestamp})
def _get_files_url(self):
return self._query_url({'files': '1'})
def data_url(self, id):
return self._query_url({'id': id})
@ -149,14 +160,26 @@ class Archive(SQLObject):
if url.find('://') > 0:
return url
if url.startswith('/'):
url = "%s/%s" % (self.archiveUrl.split('/')[0], url)
domain = "://".join(urlparse.urlsplit(self.archiveUrl)[0:2])
url = "%s%s" % (domain, url)
else:
url = "%s/%s" % (self.archiveUrl, url)
return url
def update(self):
result = simplejson.loads(read_url(self.files_url))
if result.has_key('css'):
self.css = read_url(self.full_url(result['css']))
else:
self.css = ''
if result.has_key('js'):
self.js = read_url(self.full_url(result['js']))
else:
self.js = ''
result = simplejson.loads(read_url(self.update_url))
items = result.get('items', [])
for id in items:
print "updating / adding ", id
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
q = ArchiveItem.select(AND(
ArchiveItem.q.archiveItemId == id,
@ -165,6 +188,7 @@ class Archive(SQLObject):
jsonImportArchiveItem(self, id, data)
else:
q[0].update(data)
self.initialized = True
'''
get list of all items from archive and remove those from ArchiveItem that
@ -174,9 +198,11 @@ class Archive(SQLObject):
url = self._query_url({'modDate': -1})
result = simplejson.loads(read_url(url))
archiveItems = result.get('items', [])
archivedItems = [i.archiveItemId for i in ArchiveItem.select(ArchiveItem.q.archiveID == self.id)]
removeItems = filter(lambda i: i not in archiveItems, archivedItems)
for i in removeItems: ArchiveItem.delete(i)
archivedItems = {}
for i in ArchiveItem.select(ArchiveItem.q.archiveID == self.id):
archivedItems[i.archiveItemId] = i.id
removeItems = filter(lambda i: i not in archiveItems, archivedItems.keys())
for i in removeItems: ArchiveItem.delete(archivedItems[i])
class SortName(SQLObject):
name =UnicodeCol(length=1000, alternateID=True)