oilarchive/oilarchive/model.py
j 0d3592374d - spider can read archives now
- items are indexed and queryArchive sort of works
  items get a socre element
- port some sort / session things from oxdb
- transparent png reflections
2007-03-28 21:26:58 +00:00

250 lines
7.7 KiB
Python

# -*- Mode: Python; -*-
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
from datetime import datetime
import time
from urllib import quote
import md5
from turbogears.database import PackageHub
from turbogears import identity
from turbojson.jsonify import jsonify_sqlobject
import MySQLdb
from sqlobject import *
from scrapeit.utils import read_url
import simplejson
from oilspider import jsonLoadArchiveItem, jsonImportArchiveItem
hub = PackageHub("oilarchive")
__connection__ = hub
def queryArchive(query, orderBy="score", offset = 0, count = 100):
query = MySQLdb.escape_string(query)
match = "MATCH (title, description, text) AGAINST ('%s')" % query
sql = """SELECT id, %s AS score FROM archive_item
WHERE %s ORDER BY %s""" % \
(match, match, orderBy) #, offset, count)
result = []
matches = ArchiveItem._connection.queryAll(sql)
if len(matches) > offset:
matches = matches[offset:]
if len(matches) > count:
matches = matches[:count]
for m in matches:
item = ArchiveItem.get(m[0])
item.score = m[1]
result.append(item)
return result
class ArchiveItem(SQLObject):
hashId = UnicodeCol(alternateID = True, length=128)
archiveItemId = UnicodeCol()
icon = UnicodeCol() # -> url (128x128)
title = UnicodeCol()
titleSort = UnicodeCol(default = '')
author = UnicodeCol()
authorSort = UnicodeCol(default = '')
description = UnicodeCol() # text(for rss)
html = UnicodeCol() #(for page, contains javascript)
text = UnicodeCol() #Fulltext
relDate = DateTimeCol() #timestamp (item released)
pubDate = DateTimeCol() #timestamp (item published)
modDate = DateTimeCol() #timestamp (item published)
archiveUrl = UnicodeCol() # -> url (link to archive page)
downloadUrl = UnicodeCol() # -> url (link to item)
size = IntCol() #bytes
rights = IntCol(default = 5) #-> int: 0 (free) - 5 (unfree)
itemType = UnicodeCol() #string (Text, Pictures, Music, Movies, Software)
genre = UnicodeCol(default = '')
archive = ForeignKey('Archive')
created = DateTimeCol(default=datetime.now)
#score is only available if loaded via queryArchive
score = -1
#Fulltext search
#ALTER TABLE archive_item ADD FULLTEXT (title, description, text);
def _set_author(self, value):
self._SO_set_author(value)
if not self.authorSort:
self.authorSort = value
def _get_year(self):
return self.relDate.strftime('%Y')
def _get_json(self):
result = jsonify_sqlobject(self)
result['relDate'] = self.relDate.strftime('%s')
result['pubDate'] = self.pubDate.strftime('%s')
return result
'''
return dict(
title = self.title,
description = self.description,
html = self.html,
text = self.text,
author = self.author,
archiveUrl = self.archiveUrl,
downloadUrl = self.downloadUrl,
size = self.size,
icon = '/view/%s/icon.png' % self.hash,
relDate = self.relDate.strftime('%s'),
pubDate = self.pubDate.strftime('%s'),
size = self.size,
)
'''
def update(self, data):
for key in data:
setattr(self, key, data[key])
self.updateHashID()
def updateHashID(self):
salt = '%s/%s/%s' % (self.archive.archiveName, self.author, self.title)
self.hashID = md5.new(salt).hexdigest()
class Archive(SQLObject):
archiveName = UnicodeCol(alternateID = True, length = 1000)
archiveUrl = UnicodeCol()
archiveType = UnicodeCol(default=u'')
ttl = IntCol(default = "15")
pubDate = DateTimeCol(default=datetime.now)
modDate = DateTimeCol(default=datetime.now)
created = DateTimeCol(default=datetime.now)
def _get_pubDateTimestamp(self):
return int(time.mktime(self.pubDate.timetuple()))
def _query_url(self, query):
url = "%s?" % self.archiveUrl
url += "&".join(["%s=%s" % (key, quote("%s" % query[key])) for key in query])
return url
def _get_update_url(self):
return self._query_url({'modDate': self.pubDateTimestamp})
def data_url(self, id):
return self._query_url({'id': id})
def update(self):
result = simplejson.loads(read_url(self.update_url))
items = result.get('items', [])
for id in items:
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
q = ArchiveItem.select(AND(
ArchiveItem.q.archiveItemId == id,
ArchiveItem.q.archiveID == self.id))
if q.count() == 0:
jsonImportArchiveItem(self, id, data)
else:
q[0].update(data)
class SortName(SQLObject):
name =UnicodeCol(length=1000, alternateID=True)
# identity models.
class Visit(SQLObject):
class sqlmeta:
table = "visit"
visit_key = StringCol(length=40, alternateID=True,
alternateMethodName="by_visit_key")
created = DateTimeCol(default=datetime.now)
expiry = DateTimeCol()
def lookup_visit(cls, visit_key):
try:
return cls.by_visit_key(visit_key)
except SQLObjectNotFound:
return None
lookup_visit = classmethod(lookup_visit)
class VisitIdentity(SQLObject):
visit_key = StringCol(length=40, alternateID=True,
alternateMethodName="by_visit_key")
user_id = IntCol()
class Group(SQLObject):
"""
An ultra-simple group definition.
"""
# names like "Group", "Order" and "User" are reserved words in SQL
# so we set the name to something safe for SQL
class sqlmeta:
table = "tg_group"
group_name = UnicodeCol(length=16, alternateID=True,
alternateMethodName="by_group_name")
display_name = UnicodeCol(length=255)
created = DateTimeCol(default=datetime.now)
# collection of all users belonging to this group
users = RelatedJoin("User", intermediateTable="user_group",
joinColumn="group_id", otherColumn="user_id")
# collection of all permissions for this group
permissions = RelatedJoin("Permission", joinColumn="group_id",
intermediateTable="group_permission",
otherColumn="permission_id")
class User(SQLObject):
"""
Reasonably basic User definition. Probably would want additional attributes.
"""
# names like "Group", "Order" and "User" are reserved words in SQL
# so we set the name to something safe for SQL
class sqlmeta:
table = "tg_user"
user_name = UnicodeCol(length=16, alternateID=True,
alternateMethodName="by_user_name")
email_address = UnicodeCol(length=255, alternateID=True,
alternateMethodName="by_email_address")
display_name = UnicodeCol(length=255)
password = UnicodeCol(length=40)
created = DateTimeCol(default=datetime.now)
# groups this user belongs to
groups = RelatedJoin("Group", intermediateTable="user_group",
joinColumn="user_id", otherColumn="group_id")
def _get_permissions(self):
perms = set()
for g in self.groups:
perms = perms | set(g.permissions)
return perms
def _set_password(self, cleartext_password):
"Runs cleartext_password through the hash algorithm before saving."
hash = identity.encrypt_password(cleartext_password)
self._SO_set_password(hash)
def set_password_raw(self, password):
"Saves the password as-is to the database."
self._SO_set_password(password)
class Permission(SQLObject):
permission_name = UnicodeCol(length=16, alternateID=True,
alternateMethodName="by_permission_name")
description = UnicodeCol(length=255)
groups = RelatedJoin("Group",
intermediateTable="group_permission",
joinColumn="permission_id",
otherColumn="group_id")