spider now also collects js/css files and puts the in the template
This commit is contained in:
parent
2a6ec2987c
commit
036f03a265
6 changed files with 90 additions and 30 deletions
|
|
@ -5,6 +5,7 @@
|
|||
from datetime import datetime
|
||||
import time
|
||||
from urllib import quote
|
||||
import urlparse
|
||||
import md5
|
||||
|
||||
from turbogears.database import PackageHub
|
||||
|
|
@ -118,8 +119,8 @@ class ArchiveItem(SQLObject):
|
|||
self.updateHashID()
|
||||
|
||||
def updateHashID(self):
|
||||
salt = '%s/%s/%s' % (self.archive.archiveName, self.author, self.title)
|
||||
self.hashID = md5.new(salt).hexdigest()
|
||||
salt = u'%s/%s' % (self.archive.archiveName, self.archiveItemId)
|
||||
self.hashID = md5.new(salt.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
class Archive(SQLObject):
|
||||
|
|
@ -130,10 +131,17 @@ class Archive(SQLObject):
|
|||
pubDate = DateTimeCol(default=datetime.now)
|
||||
modDate = DateTimeCol(default=datetime.now)
|
||||
created = DateTimeCol(default=datetime.now)
|
||||
|
||||
def _get_pubDateTimestamp(self):
|
||||
return int(time.mktime(self.pubDate.timetuple()))
|
||||
initialized = BoolCol(default = False)
|
||||
css = UnicodeCol(default='')
|
||||
js = UnicodeCol(default='')
|
||||
|
||||
hashId = UnicodeCol(alternateID = True, length=128)
|
||||
|
||||
def _get_pubDateTimestamp(self):
|
||||
if self.initialized:
|
||||
return int(time.mktime(self.pubDate.timetuple()))
|
||||
return -1
|
||||
|
||||
def _query_url(self, query):
|
||||
url = "%s?" % self.archiveUrl
|
||||
url += "&".join(["%s=%s" % (key, quote("%s" % query[key])) for key in query])
|
||||
|
|
@ -142,6 +150,9 @@ class Archive(SQLObject):
|
|||
def _get_update_url(self):
|
||||
return self._query_url({'modDate': self.pubDateTimestamp})
|
||||
|
||||
def _get_files_url(self):
|
||||
return self._query_url({'files': '1'})
|
||||
|
||||
def data_url(self, id):
|
||||
return self._query_url({'id': id})
|
||||
|
||||
|
|
@ -149,14 +160,26 @@ class Archive(SQLObject):
|
|||
if url.find('://') > 0:
|
||||
return url
|
||||
if url.startswith('/'):
|
||||
url = "%s/%s" % (self.archiveUrl.split('/')[0], url)
|
||||
domain = "://".join(urlparse.urlsplit(self.archiveUrl)[0:2])
|
||||
url = "%s%s" % (domain, url)
|
||||
else:
|
||||
url = "%s/%s" % (self.archiveUrl, url)
|
||||
return url
|
||||
|
||||
def update(self):
|
||||
result = simplejson.loads(read_url(self.files_url))
|
||||
if result.has_key('css'):
|
||||
self.css = read_url(self.full_url(result['css']))
|
||||
else:
|
||||
self.css = ''
|
||||
if result.has_key('js'):
|
||||
self.js = read_url(self.full_url(result['js']))
|
||||
else:
|
||||
self.js = ''
|
||||
result = simplejson.loads(read_url(self.update_url))
|
||||
items = result.get('items', [])
|
||||
for id in items:
|
||||
print "updating / adding ", id
|
||||
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
|
||||
q = ArchiveItem.select(AND(
|
||||
ArchiveItem.q.archiveItemId == id,
|
||||
|
|
@ -165,6 +188,7 @@ class Archive(SQLObject):
|
|||
jsonImportArchiveItem(self, id, data)
|
||||
else:
|
||||
q[0].update(data)
|
||||
self.initialized = True
|
||||
|
||||
'''
|
||||
get list of all items from archive and remove those from ArchiveItem that
|
||||
|
|
@ -174,9 +198,11 @@ class Archive(SQLObject):
|
|||
url = self._query_url({'modDate': -1})
|
||||
result = simplejson.loads(read_url(url))
|
||||
archiveItems = result.get('items', [])
|
||||
archivedItems = [i.archiveItemId for i in ArchiveItem.select(ArchiveItem.q.archiveID == self.id)]
|
||||
removeItems = filter(lambda i: i not in archiveItems, archivedItems)
|
||||
for i in removeItems: ArchiveItem.delete(i)
|
||||
archivedItems = {}
|
||||
for i in ArchiveItem.select(ArchiveItem.q.archiveID == self.id):
|
||||
archivedItems[i.archiveItemId] = i.id
|
||||
removeItems = filter(lambda i: i not in archiveItems, archivedItems.keys())
|
||||
for i in removeItems: ArchiveItem.delete(archivedItems[i])
|
||||
|
||||
class SortName(SQLObject):
|
||||
name =UnicodeCol(length=1000, alternateID=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue