- spider can read archives now
- items are indexed and queryArchive sort of works items get a socre element - port some sort / session things from oxdb - transparent png reflections
This commit is contained in:
parent
d4c2fe794f
commit
0d3592374d
8 changed files with 408 additions and 65 deletions
|
|
@ -3,61 +3,112 @@
|
|||
# vi:si:et:sw=2:sts=2:ts=2
|
||||
|
||||
from datetime import datetime
|
||||
import time
|
||||
from urllib import quote
|
||||
import md5
|
||||
|
||||
from turbogears.database import PackageHub
|
||||
from sqlobject import *
|
||||
from turbogears import identity
|
||||
from scrapeit import read_url
|
||||
from turbojson.jsonify import jsonify_sqlobject
|
||||
import MySQLdb
|
||||
from sqlobject import *
|
||||
|
||||
from scrapeit.utils import read_url
|
||||
import simplejson
|
||||
|
||||
from oilspider import jsonLoadArchiveItem, jsonPrepareArchiveItem, jsonImportArchiveItem
|
||||
from oilspider import jsonLoadArchiveItem, jsonImportArchiveItem
|
||||
|
||||
hub = PackageHub("oilarchive")
|
||||
__connection__ = hub
|
||||
|
||||
|
||||
def queryArchive(query, orderBy="score", offset = 0, count = 100):
|
||||
query = MySQLdb.escape_string(query)
|
||||
match = "MATCH (title, description, text) AGAINST ('%s')" % query
|
||||
sql = """SELECT id, %s AS score FROM archive_item
|
||||
WHERE %s ORDER BY %s""" % \
|
||||
(match, match, orderBy) #, offset, count)
|
||||
result = []
|
||||
matches = ArchiveItem._connection.queryAll(sql)
|
||||
if len(matches) > offset:
|
||||
matches = matches[offset:]
|
||||
if len(matches) > count:
|
||||
matches = matches[:count]
|
||||
for m in matches:
|
||||
item = ArchiveItem.get(m[0])
|
||||
item.score = m[1]
|
||||
result.append(item)
|
||||
return result
|
||||
|
||||
class ArchiveItem(SQLObject):
|
||||
hashId = UnicodeCol(alternateID = True, length=128)
|
||||
archiveId = UnicodeCol()
|
||||
archiveItemId = UnicodeCol()
|
||||
icon = UnicodeCol() # -> url (128x128)
|
||||
title = UnicodeCol()
|
||||
description = UnicodeCol()
|
||||
titleSort = UnicodeCol(default = '')
|
||||
author = UnicodeCol()
|
||||
authorSort = UnicodeCol(default = '')
|
||||
description = UnicodeCol() # text(for rss)
|
||||
html = UnicodeCol() #(for page, contains javascript)
|
||||
text = UnicodeCol() #Fulltext
|
||||
url = UnicodeCol()
|
||||
downloadURL = UnicodeCol()
|
||||
icon = UnicodeCol()
|
||||
releaseDate = DateTimeCol()
|
||||
pubDate = DateTimeCol()
|
||||
size = IntCol()
|
||||
rights = IntCol() #-> int: 0 (free) - 5 (unfree)
|
||||
archiveName = UnicodeCol()
|
||||
archiveType = UnicodeCol()
|
||||
relDate = DateTimeCol() #timestamp (item released)
|
||||
pubDate = DateTimeCol() #timestamp (item published)
|
||||
modDate = DateTimeCol() #timestamp (item published)
|
||||
archiveUrl = UnicodeCol() # -> url (link to archive page)
|
||||
downloadUrl = UnicodeCol() # -> url (link to item)
|
||||
size = IntCol() #bytes
|
||||
rights = IntCol(default = 5) #-> int: 0 (free) - 5 (unfree)
|
||||
itemType = UnicodeCol() #string (Text, Pictures, Music, Movies, Software)
|
||||
genre = UnicodeCol(default = '')
|
||||
|
||||
archive = ForeignKey('Archive')
|
||||
created = DateTimeCol(default=datetime.now)
|
||||
|
||||
#score is only available if loaded via queryArchive
|
||||
score = -1
|
||||
|
||||
#Fulltext search
|
||||
#ALTER TABLE archive_item ADD FULLTEXT (title, description, text);
|
||||
|
||||
|
||||
def _set_author(self, value):
|
||||
self._SO_set_author(value)
|
||||
if not self.author_sort:
|
||||
self.author_sort = value
|
||||
|
||||
if not self.authorSort:
|
||||
self.authorSort = value
|
||||
|
||||
def _get_year(self):
|
||||
return self.releaseDate.strftime('%Y')
|
||||
return self.relDate.strftime('%Y')
|
||||
|
||||
def _get_json(self):
|
||||
result = jsonify_sqlobject(self)
|
||||
result['relDate'] = self.relDate.strftime('%s')
|
||||
result['pubDate'] = self.pubDate.strftime('%s')
|
||||
return result
|
||||
'''
|
||||
return dict(
|
||||
title = self.title,
|
||||
description = self.description,
|
||||
html = self.html,
|
||||
text = self.text,
|
||||
author = self.author,
|
||||
url = self.url,
|
||||
icon = '/view/%s/icon' % self.hash,
|
||||
releaseDate = self.releaseDate,
|
||||
pubDate = self.pubDate,
|
||||
archiveUrl = self.archiveUrl,
|
||||
downloadUrl = self.downloadUrl,
|
||||
size = self.size,
|
||||
icon = '/view/%s/icon.png' % self.hash,
|
||||
relDate = self.relDate.strftime('%s'),
|
||||
pubDate = self.pubDate.strftime('%s'),
|
||||
size = self.size,
|
||||
)
|
||||
'''
|
||||
|
||||
def update(self, data):
|
||||
for key in data:
|
||||
setattr(self, key, values[key])
|
||||
setattr(self, key, data[key])
|
||||
self.updateHashID()
|
||||
|
||||
def updateHashID(self):
|
||||
salt = '%s/%s/%s' % (self.archive.archiveName, self.author, self.title)
|
||||
self.hashID = md5.new(salt).hexdigest()
|
||||
|
||||
|
||||
class Archive(SQLObject):
|
||||
|
|
@ -66,27 +117,33 @@ class Archive(SQLObject):
|
|||
archiveType = UnicodeCol(default=u'')
|
||||
ttl = IntCol(default = "15")
|
||||
pubDate = DateTimeCol(default=datetime.now)
|
||||
modDate = DateTimeCol(default=datetime.now)
|
||||
created = DateTimeCol(default=datetime.now)
|
||||
|
||||
def _get_pubDateTimestamp(self):
|
||||
return time.mktime(self.pubDate.timetuple())
|
||||
return int(time.mktime(self.pubDate.timetuple()))
|
||||
|
||||
def _query_url(self, query):
|
||||
url = "%s?" % self.archiveUrl
|
||||
url += "&".join(["%s=%s" % (key, quote("%s" % query[key])) for key in query])
|
||||
return url
|
||||
|
||||
def _get_update_url(self):
|
||||
return "%s?pubDate=%s" % (self.archiveUrl, self.pubDateTimestamp)
|
||||
return self._query_url({'modDate': self.pubDateTimestamp})
|
||||
|
||||
def data_url(self, id):
|
||||
return "%s?id=%s" % (self.archiveUrl, id)
|
||||
return self._query_url({'id': id})
|
||||
|
||||
def update(self):
|
||||
result = simplejson.loads(read_url(self.update_url))
|
||||
for id in result:
|
||||
items = result.get('items', [])
|
||||
for id in items:
|
||||
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
|
||||
q = ArchiveItem.select(AND(
|
||||
ArchiveItem.q.ArchiveId == id,
|
||||
ArchiveItem.q.ArchiveName == self.ArchiveName))
|
||||
ArchiveItem.q.archiveItemId == id,
|
||||
ArchiveItem.q.archiveID == self.id))
|
||||
if q.count() == 0:
|
||||
data = jsonPrepareArchiveItem(id, data)
|
||||
jsonImportArchiveItem(data)
|
||||
jsonImportArchiveItem(self, id, data)
|
||||
else:
|
||||
q[0].update(data)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue