- storeUrl

- sort / search
- show sort by field
This commit is contained in:
j 2007-04-04 14:07:16 +00:00
commit 2644f0fccf
10 changed files with 349 additions and 51 deletions

View file

@ -18,6 +18,7 @@ from scrapeit.utils import read_url
import simplejson
from oilspider import jsonLoadArchiveItem, jsonImportArchiveItem
import utils
hub = PackageHub("oilarchive")
__connection__ = hub
@ -25,11 +26,18 @@ __connection__ = hub
def queryArchive(query, orderBy="score", offset = 0, count = 100):
query = MySQLdb.escape_string(query)
orderBy = orderBy.encode('utf-8')
print orderBy
if orderBy not in ('score', 'size', 'title', 'description'):
orderBy = 'score'
if orderBy == 'size':
orderBy = "size DESC"
match = "MATCH (title, description, text) AGAINST ('%s')" % query
sql = """SELECT id, %s AS score FROM archive_item
sql = """SELECT id, %s AS score, title, size, description FROM archive_item
WHERE %s ORDER BY %s""" % \
(match, match, orderBy) #, offset, count)
result = []
print sql
matches = ArchiveItem._connection.queryAll(sql)
if len(matches) > offset:
matches = matches[offset:]
@ -57,6 +65,7 @@ class ArchiveItem(SQLObject):
modDate = DateTimeCol() #timestamp (item published)
archiveUrl = UnicodeCol() # -> url (link to archive page)
downloadUrl = UnicodeCol() # -> url (link to item)
storeUrl = UnicodeCol() # -> url (link to store)
size = IntCol() #bytes
rights = IntCol(default = 5) #-> int: 0 (free) - 5 (unfree)
itemType = UnicodeCol() #string (Text, Pictures, Music, Movies, Software)
@ -71,7 +80,11 @@ class ArchiveItem(SQLObject):
#Fulltext search
#ALTER TABLE archive_item ADD FULLTEXT (title, description, text);
def getPreview(self, sort):
if sort == 'size':
return utils.formatFileSize(self.size)
return self.relDateFormated
def _set_author(self, value):
self._SO_set_author(value)
if not self.authorSort:
@ -80,6 +93,11 @@ class ArchiveItem(SQLObject):
def _get_year(self):
return self.relDate.strftime('%Y')
def _get_relDateFormated(self):
if self.itemType in ('Movie', 'Book'):
return self.year
else:
return self.relDate.strftime('%Y-%m-%d')
#expand urls in case they are relative to the archive
def _get_archiveUrl(self):
@ -140,9 +158,9 @@ class Archive(SQLObject):
def setHashId(self):
self.hashId = md5.new("%s" % self.id).hexdigest()
def _get_pubDateTimestamp(self):
def _get_modDateTimestamp(self):
if self.initialized:
return int(time.mktime(self.pubDate.timetuple()))
return int(time.mktime(self.modDate.timetuple()))
return -1
def _query_url(self, query):
@ -151,7 +169,7 @@ class Archive(SQLObject):
return url
def _get_update_url(self):
return self._query_url({'modDate': self.pubDateTimestamp})
return self._query_url({'modDate': self.modDateTimestamp})
def _get_files_url(self):
return self._query_url({'files': '1'})
@ -181,11 +199,12 @@ class Archive(SQLObject):
self.js = ''
result = simplejson.loads(read_url(self.update_url))
items = result.get('items', [])
print len(items)
print "importing", len(items), "items"
for id in items:
try:
data = read_url(self.data_url(id))
data = jsonLoadArchiveItem(data)
print data['title'].encode('utf-8')
except:
print "failed to load ", id, "from ", self.data_url(id)
continue
@ -196,7 +215,8 @@ class Archive(SQLObject):
jsonImportArchiveItem(self, id, data)
else:
q[0].update(data)
self.initialized = True
self.initialized = True
self.modDate = datetime.now()
'''
get list of all items from archive and remove those from ArchiveItem that