From 0d3592374d178f10db2f0dabbbdd905e8a9b88cc Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 28 Mar 2007 21:26:58 +0000 Subject: [PATCH] - spider can read archives now - items are indexed and queryArchive sort of works items get a socre element - port some sort / session things from oxdb - transparent png reflections --- dev.cfg | 2 +- oilarchive/controllers.py | 119 ++++++++++++++++++++++++++--- oilarchive/model.py | 119 +++++++++++++++++++++-------- oilarchive/oilcache.py | 78 +++++++++++++++---- oilarchive/oilspider.py | 43 ++++++++--- oilarchive/templates/iconview.kid | 22 ++++++ oilarchive/templates/listview.kid | 41 ++++++++++ oilarchive/templates/quoteview.kid | 49 ++++++++++++ 8 files changed, 408 insertions(+), 65 deletions(-) create mode 100644 oilarchive/templates/iconview.kid create mode 100644 oilarchive/templates/listview.kid create mode 100644 oilarchive/templates/quoteview.kid diff --git a/dev.cfg b/dev.cfg index 2792fe5..4bb6ed5 100644 --- a/dev.cfg +++ b/dev.cfg @@ -18,7 +18,7 @@ server.socket_port=2323 server.environment="development" autoreload.package="oilarchive" -# session_filter.on = True +session_filter.on = True # Set to True if you'd like to abort execution if a controller gets an # unexpected parameter. False by default diff --git a/oilarchive/controllers.py b/oilarchive/controllers.py index 97fe1bd..c99bfd1 100644 --- a/oilarchive/controllers.py +++ b/oilarchive/controllers.py @@ -6,10 +6,13 @@ from turbogears import controllers, expose, validate, error_handler from model import * from turbogears import identity, redirect from cherrypy import request, response +import cherrypy + # import logging # log = logging.getLogger("oilarchive.controllers") -# from oilarchive import json +from oilarchive import json + import oilcache from forms import forms from sortname import sortname @@ -20,8 +23,12 @@ class View: return dict(item = item) def icon(self, item): - response.headerMap['Content-Type'] = "image/png" - return oilcache.icon(item) + response.headerMap['Content-Type'] = "image/jpeg" + return oilcache.loadIcon(item) + + def icon_reflection(self, item): + response.headerMap['Content-Type'] = "image/jpeg" + return oilcache.loadIconReflection(item) @expose() def default(self, id, *args, **kw): @@ -29,8 +36,10 @@ class View: item = ArchiveItem.byHashId(id) if not args: return self.view(item) - elif args[0] == 'icon.png': + elif args[0] == 'icon.jpg': return self.icon(item) + elif args[0] == 'icon_reflection.jpg': + return self.icon_reflection(item) elif args[0] == 'json': return item.json @@ -96,13 +105,6 @@ class Root(controllers.RootController): admin = Admin() api = Api() - @expose(template=".templates.welcome") - # @identity.require(identity.in_group("admin")) - def index(self): - import time - # log.debug("Happy TurboGears Controller Responding For Duty") - return dict(now=time.ctime()) - @expose(template=".templates.login") def login(self, forward_url=None, previous_url=None, *args, **kw): if not identity.current.anonymous \ @@ -132,3 +134,98 @@ class Root(controllers.RootController): def logout(self): identity.current.logout() raise redirect("/") + + def default_search_values(self): + return dict(q = '', f = 'all', s = 'title', o = 0, n = 60, l = 'all', v = 'icon', length = 0) + + _sort_map = { + 'id': 'imdb', + 'director': 'director_html', + 'writer': 'writer_html', + 'language': 'language_html', + 'releasedate': 'release_date', + 'cast': 'cast_html', + 'genre': 'genre_html', + 'keywords': 'keywords_html', + 'connections': 'connections_sort', + 'title': 'title_sort', + 'country': 'country_html', + 'producer': 'producer_html', + 'summary': 'plot', + 'trivia': 'plot', + 'date': 'latest_file_date', + 'year': 'release_date', + } + + def get_sort(self, s): + s = str(self._sort_map.get(s, s)) + if s in ('release_date', 'size', 'pub_date'): + s = '-%s' % s + return s + + _field_map = { + 'title': ArchiveItem.q.title, + 'author': ArchiveItem.q.author, + } + + _search_map = { + 'summary': 'plot', + 'trivia': 'plot', + 'releasedate': 'release_date', + 'script': 'year', + 'title': 'year', + 'director': 'year' + } + + @expose(template=".templates.iconview") + def search(self, q = '', f = None, s = None, o = -1, n = None, l = None, v = None): + search = cherrypy.session.get('search', self.default_search_values()) + if not v: + v = search['v'] + if not l: + l = search['l'] + if not n: + n = search['n'] + if o == -1: + o = search['o'] + if not s: + s = search['s'] + if not f: + f = search['f'] + + o = int(o) + n = int(n) + + search = dict(q = q, f = f, s = s, o = o, n = n, l = l, v = v) + + tg_template = ".templates.iconview" + if v == 'list': + tg_template = ".templates.listview" + if v == 'quote': + tg_template = ".templates.quoteview" + + orderBy = [self.get_sort(s), 'title_sort', 'title'] + items = [] + if q: + items = queryArchive(q) + //items = ArchiveItems.select(LIKE(ArchiveItems.q.text, '%' + q + '%'), orderBy = orderBy) + sort = s + if sort.startswith('-'): + sort = sort[1:] + sort = self._search_map.get(sort, sort) + sort = self._sort_map.get(sort, sort) + print sort + if type(items) == list: + search['length'] = len(items) + else: + search['length'] = items.count() + cherrypy.session['search'] = search + return dict(items = items[o:o+n], sort = sort, search = search, tg_template = tg_template) + + @expose(template=".templates.listview") + # @identity.require(identity.in_group("admin")) + def default(self, hashID = '', **args): + if hashID and len(hashID) == 32: + return self.view(hashID, args) + return self.search(**args) + diff --git a/oilarchive/model.py b/oilarchive/model.py index dad14e1..5a908c5 100644 --- a/oilarchive/model.py +++ b/oilarchive/model.py @@ -3,61 +3,112 @@ # vi:si:et:sw=2:sts=2:ts=2 from datetime import datetime +import time +from urllib import quote +import md5 from turbogears.database import PackageHub -from sqlobject import * from turbogears import identity -from scrapeit import read_url +from turbojson.jsonify import jsonify_sqlobject +import MySQLdb +from sqlobject import * + +from scrapeit.utils import read_url import simplejson -from oilspider import jsonLoadArchiveItem, jsonPrepareArchiveItem, jsonImportArchiveItem +from oilspider import jsonLoadArchiveItem, jsonImportArchiveItem hub = PackageHub("oilarchive") __connection__ = hub +def queryArchive(query, orderBy="score", offset = 0, count = 100): + query = MySQLdb.escape_string(query) + match = "MATCH (title, description, text) AGAINST ('%s')" % query + sql = """SELECT id, %s AS score FROM archive_item +WHERE %s ORDER BY %s""" % \ + (match, match, orderBy) #, offset, count) + result = [] + matches = ArchiveItem._connection.queryAll(sql) + if len(matches) > offset: + matches = matches[offset:] + if len(matches) > count: + matches = matches[:count] + for m in matches: + item = ArchiveItem.get(m[0]) + item.score = m[1] + result.append(item) + return result + class ArchiveItem(SQLObject): hashId = UnicodeCol(alternateID = True, length=128) - archiveId = UnicodeCol() + archiveItemId = UnicodeCol() + icon = UnicodeCol() # -> url (128x128) title = UnicodeCol() - description = UnicodeCol() + titleSort = UnicodeCol(default = '') author = UnicodeCol() authorSort = UnicodeCol(default = '') + description = UnicodeCol() # text(for rss) + html = UnicodeCol() #(for page, contains javascript) text = UnicodeCol() #Fulltext - url = UnicodeCol() - downloadURL = UnicodeCol() - icon = UnicodeCol() - releaseDate = DateTimeCol() - pubDate = DateTimeCol() - size = IntCol() - rights = IntCol() #-> int: 0 (free) - 5 (unfree) - archiveName = UnicodeCol() - archiveType = UnicodeCol() + relDate = DateTimeCol() #timestamp (item released) + pubDate = DateTimeCol() #timestamp (item published) + modDate = DateTimeCol() #timestamp (item published) + archiveUrl = UnicodeCol() # -> url (link to archive page) + downloadUrl = UnicodeCol() # -> url (link to item) + size = IntCol() #bytes + rights = IntCol(default = 5) #-> int: 0 (free) - 5 (unfree) + itemType = UnicodeCol() #string (Text, Pictures, Music, Movies, Software) + genre = UnicodeCol(default = '') + + archive = ForeignKey('Archive') created = DateTimeCol(default=datetime.now) + #score is only available if loaded via queryArchive + score = -1 + + #Fulltext search + #ALTER TABLE archive_item ADD FULLTEXT (title, description, text); + + def _set_author(self, value): self._SO_set_author(value) - if not self.author_sort: - self.author_sort = value - + if not self.authorSort: + self.authorSort = value + def _get_year(self): - return self.releaseDate.strftime('%Y') + return self.relDate.strftime('%Y') def _get_json(self): + result = jsonify_sqlobject(self) + result['relDate'] = self.relDate.strftime('%s') + result['pubDate'] = self.pubDate.strftime('%s') + return result + ''' return dict( title = self.title, description = self.description, + html = self.html, + text = self.text, author = self.author, - url = self.url, - icon = '/view/%s/icon' % self.hash, - releaseDate = self.releaseDate, - pubDate = self.pubDate, + archiveUrl = self.archiveUrl, + downloadUrl = self.downloadUrl, + size = self.size, + icon = '/view/%s/icon.png' % self.hash, + relDate = self.relDate.strftime('%s'), + pubDate = self.pubDate.strftime('%s'), size = self.size, ) + ''' def update(self, data): for key in data: - setattr(self, key, values[key]) + setattr(self, key, data[key]) + self.updateHashID() + + def updateHashID(self): + salt = '%s/%s/%s' % (self.archive.archiveName, self.author, self.title) + self.hashID = md5.new(salt).hexdigest() class Archive(SQLObject): @@ -66,27 +117,33 @@ class Archive(SQLObject): archiveType = UnicodeCol(default=u'') ttl = IntCol(default = "15") pubDate = DateTimeCol(default=datetime.now) + modDate = DateTimeCol(default=datetime.now) created = DateTimeCol(default=datetime.now) def _get_pubDateTimestamp(self): - return time.mktime(self.pubDate.timetuple()) + return int(time.mktime(self.pubDate.timetuple())) + def _query_url(self, query): + url = "%s?" % self.archiveUrl + url += "&".join(["%s=%s" % (key, quote("%s" % query[key])) for key in query]) + return url + def _get_update_url(self): - return "%s?pubDate=%s" % (self.archiveUrl, self.pubDateTimestamp) + return self._query_url({'modDate': self.pubDateTimestamp}) def data_url(self, id): - return "%s?id=%s" % (self.archiveUrl, id) + return self._query_url({'id': id}) def update(self): result = simplejson.loads(read_url(self.update_url)) - for id in result: + items = result.get('items', []) + for id in items: data = jsonLoadArchiveItem(read_url(self.data_url(id))) q = ArchiveItem.select(AND( - ArchiveItem.q.ArchiveId == id, - ArchiveItem.q.ArchiveName == self.ArchiveName)) + ArchiveItem.q.archiveItemId == id, + ArchiveItem.q.archiveID == self.id)) if q.count() == 0: - data = jsonPrepareArchiveItem(id, data) - jsonImportArchiveItem(data) + jsonImportArchiveItem(self, id, data) else: q[0].update(data) diff --git a/oilarchive/oilcache.py b/oilarchive/oilcache.py index 46b0f28..9be845d 100644 --- a/oilarchive/oilcache.py +++ b/oilarchive/oilcache.py @@ -3,33 +3,85 @@ # vi:si:et:sw=2:sts=2:ts=2 import os -from os.path import abspath, exists, join +from os.path import abspath, exists, join, dirname +import Image from scrapeit.utils import read_url cache_root = join(abspath(__file__), 'cache') +img_extension = "png" - -def load_file(f_name): +def loadFile(f_name): f = open(f_name) data = f.read() f.close() return data - -def save_file(f_name, data): + +def saveFile(f_name, data): f = open(f_name, 'w') f.write(data) f.close() - -def icon(item): - icon_root = join(cache_root, 'icon') - if not exists(icon_root): - os.makedirs(icon_root) - icon = join(icon_root, "%s.png" % item.hashId) + +''' + returns name including a possible directory level for a given hash +''' +def imgName(hashId): + return "%s/%s.%s" % (hashId[:16], hashId, img_extension) + +''' + returns path to an icon from iconType for given icon in the cache +''' +def iconPath(iconType, item): + icon_root = join(cache_root, iconType) + icon = join(icon_root, imgName(item.hashId)) + if not exists(dirname(icon)): + os.makedirs(dirname(icon)) + return icon + +''' + render reflection of sourceFile on targetFile, + uses alpha, target files needs to support RGBA, i.e. png +''' +def _writeReflection(sourceFile, targetFile, height = 0.5, opacity = 0.25): + sourceImage = Image.open(sourceFile).convert('RGB') + sourceSource = sourceImage.size[0] + sourceHeight = sourceImage.size[1] + + targetWidth = sourceImage.size[0] + targetHeight = int(round(sourceHeight * height)) + targetImage = Image.new('RGBA', (targetWidth, targetHeight)) + + for y in range(0, targetHeight): + brightness = int(255 * (targetHeight - y) * opacity / targetHeight) + for x in range(0, targetWidth): + targetColor = sourceImage.getpixel((x, sourceHeight - 1 - y)) + targetColor += (brightness, ) + targetImage.putpixel((x, y), targetColor) + targetImage.save(targetFile, optimized = True) + +''' + return icon data, reads from remote url if not cached +''' +def loadIcon(item): + icon = iconPath('icon', item) if exists(icon): - data = laod_file(icon) + data = loadFile(icon) else: data = read_url(item.icon) - save_file(icon, data) + saveFile(icon, data) return data +''' + return icon reflection data, renders reflection if it does not exists +''' +def loadIconReflection(item): + icon = iconPath('icon', item) + iconReflection = iconPath('iconReflection', item) + if not exists(iconReflection): + if not exists(icon): + icon(item) + if exists(icon): + _writeReflection(icon, iconReflection) + else: + return '' + return loadFile(iconReflection) diff --git a/oilarchive/oilspider.py b/oilarchive/oilspider.py index 59ff67c..ed8e64c 100644 --- a/oilarchive/oilspider.py +++ b/oilarchive/oilspider.py @@ -2,23 +2,48 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=2:sts=2:ts=2 +from datetime import datetime +import time + import simplejson -from model import * - +import model +import md5 def jsonLoadArchiveItem(data): json_array = simplejson.loads(data) - for key in ('releaseDate', 'pubDate'): + json_array.pop('tg_flash', None) + for key in ('relDate', 'pubDate', 'modDate'): json_array[key] = datetime.utcfromtimestamp(float(json_array[key])) for key in ('rights', 'size'): json_array[key] = int(json_array[key]) - -def jsonPrepareArchiveItem(sid, json_array): - json_array['archiveId'] = sid + json_array['itemType'] = json_array.pop('type', 'Text') return json_array -def jsonImportArchiveItem(archiveId, json_array): - json_array = jsonPrepareArchiveItem(archiveId, json_array) - ArchiveItem( **json_array) + +def jsonImportArchiveItem(archive, archiveItemId, json_array): + if isinstance(json_array, basestring): + json_array = jsonLoadArchiveItem(json_array) + salt = '%s/%s/%s' % (archive.archiveName, json_array['author'], json_array['title']) + hashID = md5.new(salt).hexdigest() + i = model.ArchiveItem( + archiveID=archive.id, + hashId = hashID, + archiveItemId = "%s" % archiveItemId, + description=json_array['description'], + rights=json_array['rights'], + text=json_array['text'], + author=json_array['author'], + pubDate=json_array['pubDate'], + relDate=json_array['relDate'], + modDate=json_array['modDate'], + archiveUrl=json_array['archiveUrl'], + downloadUrl=json_array['downloadUrl'], + html=json_array['html'], + genre=json_array['genre'], + title=json_array['title'], + size=json_array['size'], + itemType=json_array['itemType'], + icon= json_array['icon'] + ) diff --git a/oilarchive/templates/iconview.kid b/oilarchive/templates/iconview.kid new file mode 100644 index 0000000..136fbb3 --- /dev/null +++ b/oilarchive/templates/iconview.kid @@ -0,0 +1,22 @@ + + +
+ +