- spider can read archives now

- items are indexed and queryArchive sort of works
  items get a socre element
- port some sort / session things from oxdb
- transparent png reflections
This commit is contained in:
j 2007-03-28 21:26:58 +00:00
parent d4c2fe794f
commit 0d3592374d
8 changed files with 408 additions and 65 deletions

View file

@ -18,7 +18,7 @@ server.socket_port=2323
server.environment="development"
autoreload.package="oilarchive"
# session_filter.on = True
session_filter.on = True
# Set to True if you'd like to abort execution if a controller gets an
# unexpected parameter. False by default

View file

@ -6,10 +6,13 @@ from turbogears import controllers, expose, validate, error_handler
from model import *
from turbogears import identity, redirect
from cherrypy import request, response
import cherrypy
# import logging
# log = logging.getLogger("oilarchive.controllers")
# from oilarchive import json
from oilarchive import json
import oilcache
from forms import forms
from sortname import sortname
@ -20,8 +23,12 @@ class View:
return dict(item = item)
def icon(self, item):
response.headerMap['Content-Type'] = "image/png"
return oilcache.icon(item)
response.headerMap['Content-Type'] = "image/jpeg"
return oilcache.loadIcon(item)
def icon_reflection(self, item):
response.headerMap['Content-Type'] = "image/jpeg"
return oilcache.loadIconReflection(item)
@expose()
def default(self, id, *args, **kw):
@ -29,8 +36,10 @@ class View:
item = ArchiveItem.byHashId(id)
if not args:
return self.view(item)
elif args[0] == 'icon.png':
elif args[0] == 'icon.jpg':
return self.icon(item)
elif args[0] == 'icon_reflection.jpg':
return self.icon_reflection(item)
elif args[0] == 'json':
return item.json
@ -96,13 +105,6 @@ class Root(controllers.RootController):
admin = Admin()
api = Api()
@expose(template=".templates.welcome")
# @identity.require(identity.in_group("admin"))
def index(self):
import time
# log.debug("Happy TurboGears Controller Responding For Duty")
return dict(now=time.ctime())
@expose(template=".templates.login")
def login(self, forward_url=None, previous_url=None, *args, **kw):
if not identity.current.anonymous \
@ -132,3 +134,98 @@ class Root(controllers.RootController):
def logout(self):
identity.current.logout()
raise redirect("/")
def default_search_values(self):
return dict(q = '', f = 'all', s = 'title', o = 0, n = 60, l = 'all', v = 'icon', length = 0)
_sort_map = {
'id': 'imdb',
'director': 'director_html',
'writer': 'writer_html',
'language': 'language_html',
'releasedate': 'release_date',
'cast': 'cast_html',
'genre': 'genre_html',
'keywords': 'keywords_html',
'connections': 'connections_sort',
'title': 'title_sort',
'country': 'country_html',
'producer': 'producer_html',
'summary': 'plot',
'trivia': 'plot',
'date': 'latest_file_date',
'year': 'release_date',
}
def get_sort(self, s):
s = str(self._sort_map.get(s, s))
if s in ('release_date', 'size', 'pub_date'):
s = '-%s' % s
return s
_field_map = {
'title': ArchiveItem.q.title,
'author': ArchiveItem.q.author,
}
_search_map = {
'summary': 'plot',
'trivia': 'plot',
'releasedate': 'release_date',
'script': 'year',
'title': 'year',
'director': 'year'
}
@expose(template=".templates.iconview")
def search(self, q = '', f = None, s = None, o = -1, n = None, l = None, v = None):
search = cherrypy.session.get('search', self.default_search_values())
if not v:
v = search['v']
if not l:
l = search['l']
if not n:
n = search['n']
if o == -1:
o = search['o']
if not s:
s = search['s']
if not f:
f = search['f']
o = int(o)
n = int(n)
search = dict(q = q, f = f, s = s, o = o, n = n, l = l, v = v)
tg_template = ".templates.iconview"
if v == 'list':
tg_template = ".templates.listview"
if v == 'quote':
tg_template = ".templates.quoteview"
orderBy = [self.get_sort(s), 'title_sort', 'title']
items = []
if q:
items = queryArchive(q)
//items = ArchiveItems.select(LIKE(ArchiveItems.q.text, '%' + q + '%'), orderBy = orderBy)
sort = s
if sort.startswith('-'):
sort = sort[1:]
sort = self._search_map.get(sort, sort)
sort = self._sort_map.get(sort, sort)
print sort
if type(items) == list:
search['length'] = len(items)
else:
search['length'] = items.count()
cherrypy.session['search'] = search
return dict(items = items[o:o+n], sort = sort, search = search, tg_template = tg_template)
@expose(template=".templates.listview")
# @identity.require(identity.in_group("admin"))
def default(self, hashID = '', **args):
if hashID and len(hashID) == 32:
return self.view(hashID, args)
return self.search(**args)

View file

@ -3,61 +3,112 @@
# vi:si:et:sw=2:sts=2:ts=2
from datetime import datetime
import time
from urllib import quote
import md5
from turbogears.database import PackageHub
from sqlobject import *
from turbogears import identity
from scrapeit import read_url
from turbojson.jsonify import jsonify_sqlobject
import MySQLdb
from sqlobject import *
from scrapeit.utils import read_url
import simplejson
from oilspider import jsonLoadArchiveItem, jsonPrepareArchiveItem, jsonImportArchiveItem
from oilspider import jsonLoadArchiveItem, jsonImportArchiveItem
hub = PackageHub("oilarchive")
__connection__ = hub
def queryArchive(query, orderBy="score", offset = 0, count = 100):
query = MySQLdb.escape_string(query)
match = "MATCH (title, description, text) AGAINST ('%s')" % query
sql = """SELECT id, %s AS score FROM archive_item
WHERE %s ORDER BY %s""" % \
(match, match, orderBy) #, offset, count)
result = []
matches = ArchiveItem._connection.queryAll(sql)
if len(matches) > offset:
matches = matches[offset:]
if len(matches) > count:
matches = matches[:count]
for m in matches:
item = ArchiveItem.get(m[0])
item.score = m[1]
result.append(item)
return result
class ArchiveItem(SQLObject):
hashId = UnicodeCol(alternateID = True, length=128)
archiveId = UnicodeCol()
archiveItemId = UnicodeCol()
icon = UnicodeCol() # -> url (128x128)
title = UnicodeCol()
description = UnicodeCol()
titleSort = UnicodeCol(default = '')
author = UnicodeCol()
authorSort = UnicodeCol(default = '')
description = UnicodeCol() # text(for rss)
html = UnicodeCol() #(for page, contains javascript)
text = UnicodeCol() #Fulltext
url = UnicodeCol()
downloadURL = UnicodeCol()
icon = UnicodeCol()
releaseDate = DateTimeCol()
pubDate = DateTimeCol()
size = IntCol()
rights = IntCol() #-> int: 0 (free) - 5 (unfree)
archiveName = UnicodeCol()
archiveType = UnicodeCol()
relDate = DateTimeCol() #timestamp (item released)
pubDate = DateTimeCol() #timestamp (item published)
modDate = DateTimeCol() #timestamp (item published)
archiveUrl = UnicodeCol() # -> url (link to archive page)
downloadUrl = UnicodeCol() # -> url (link to item)
size = IntCol() #bytes
rights = IntCol(default = 5) #-> int: 0 (free) - 5 (unfree)
itemType = UnicodeCol() #string (Text, Pictures, Music, Movies, Software)
genre = UnicodeCol(default = '')
archive = ForeignKey('Archive')
created = DateTimeCol(default=datetime.now)
#score is only available if loaded via queryArchive
score = -1
#Fulltext search
#ALTER TABLE archive_item ADD FULLTEXT (title, description, text);
def _set_author(self, value):
self._SO_set_author(value)
if not self.author_sort:
self.author_sort = value
if not self.authorSort:
self.authorSort = value
def _get_year(self):
return self.releaseDate.strftime('%Y')
return self.relDate.strftime('%Y')
def _get_json(self):
result = jsonify_sqlobject(self)
result['relDate'] = self.relDate.strftime('%s')
result['pubDate'] = self.pubDate.strftime('%s')
return result
'''
return dict(
title = self.title,
description = self.description,
html = self.html,
text = self.text,
author = self.author,
url = self.url,
icon = '/view/%s/icon' % self.hash,
releaseDate = self.releaseDate,
pubDate = self.pubDate,
archiveUrl = self.archiveUrl,
downloadUrl = self.downloadUrl,
size = self.size,
icon = '/view/%s/icon.png' % self.hash,
relDate = self.relDate.strftime('%s'),
pubDate = self.pubDate.strftime('%s'),
size = self.size,
)
'''
def update(self, data):
for key in data:
setattr(self, key, values[key])
setattr(self, key, data[key])
self.updateHashID()
def updateHashID(self):
salt = '%s/%s/%s' % (self.archive.archiveName, self.author, self.title)
self.hashID = md5.new(salt).hexdigest()
class Archive(SQLObject):
@ -66,27 +117,33 @@ class Archive(SQLObject):
archiveType = UnicodeCol(default=u'')
ttl = IntCol(default = "15")
pubDate = DateTimeCol(default=datetime.now)
modDate = DateTimeCol(default=datetime.now)
created = DateTimeCol(default=datetime.now)
def _get_pubDateTimestamp(self):
return time.mktime(self.pubDate.timetuple())
return int(time.mktime(self.pubDate.timetuple()))
def _query_url(self, query):
url = "%s?" % self.archiveUrl
url += "&".join(["%s=%s" % (key, quote("%s" % query[key])) for key in query])
return url
def _get_update_url(self):
return "%s?pubDate=%s" % (self.archiveUrl, self.pubDateTimestamp)
return self._query_url({'modDate': self.pubDateTimestamp})
def data_url(self, id):
return "%s?id=%s" % (self.archiveUrl, id)
return self._query_url({'id': id})
def update(self):
result = simplejson.loads(read_url(self.update_url))
for id in result:
items = result.get('items', [])
for id in items:
data = jsonLoadArchiveItem(read_url(self.data_url(id)))
q = ArchiveItem.select(AND(
ArchiveItem.q.ArchiveId == id,
ArchiveItem.q.ArchiveName == self.ArchiveName))
ArchiveItem.q.archiveItemId == id,
ArchiveItem.q.archiveID == self.id))
if q.count() == 0:
data = jsonPrepareArchiveItem(id, data)
jsonImportArchiveItem(data)
jsonImportArchiveItem(self, id, data)
else:
q[0].update(data)

View file

@ -3,33 +3,85 @@
# vi:si:et:sw=2:sts=2:ts=2
import os
from os.path import abspath, exists, join
from os.path import abspath, exists, join, dirname
import Image
from scrapeit.utils import read_url
cache_root = join(abspath(__file__), 'cache')
img_extension = "png"
def load_file(f_name):
def loadFile(f_name):
f = open(f_name)
data = f.read()
f.close()
return data
def save_file(f_name, data):
def saveFile(f_name, data):
f = open(f_name, 'w')
f.write(data)
f.close()
def icon(item):
icon_root = join(cache_root, 'icon')
if not exists(icon_root):
os.makedirs(icon_root)
icon = join(icon_root, "%s.png" % item.hashId)
'''
returns name including a possible directory level for a given hash
'''
def imgName(hashId):
return "%s/%s.%s" % (hashId[:16], hashId, img_extension)
'''
returns path to an icon from iconType for given icon in the cache
'''
def iconPath(iconType, item):
icon_root = join(cache_root, iconType)
icon = join(icon_root, imgName(item.hashId))
if not exists(dirname(icon)):
os.makedirs(dirname(icon))
return icon
'''
render reflection of sourceFile on targetFile,
uses alpha, target files needs to support RGBA, i.e. png
'''
def _writeReflection(sourceFile, targetFile, height = 0.5, opacity = 0.25):
sourceImage = Image.open(sourceFile).convert('RGB')
sourceSource = sourceImage.size[0]
sourceHeight = sourceImage.size[1]
targetWidth = sourceImage.size[0]
targetHeight = int(round(sourceHeight * height))
targetImage = Image.new('RGBA', (targetWidth, targetHeight))
for y in range(0, targetHeight):
brightness = int(255 * (targetHeight - y) * opacity / targetHeight)
for x in range(0, targetWidth):
targetColor = sourceImage.getpixel((x, sourceHeight - 1 - y))
targetColor += (brightness, )
targetImage.putpixel((x, y), targetColor)
targetImage.save(targetFile, optimized = True)
'''
return icon data, reads from remote url if not cached
'''
def loadIcon(item):
icon = iconPath('icon', item)
if exists(icon):
data = laod_file(icon)
data = loadFile(icon)
else:
data = read_url(item.icon)
save_file(icon, data)
saveFile(icon, data)
return data
'''
return icon reflection data, renders reflection if it does not exists
'''
def loadIconReflection(item):
icon = iconPath('icon', item)
iconReflection = iconPath('iconReflection', item)
if not exists(iconReflection):
if not exists(icon):
icon(item)
if exists(icon):
_writeReflection(icon, iconReflection)
else:
return ''
return loadFile(iconReflection)

View file

@ -2,23 +2,48 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=2:sts=2:ts=2
from datetime import datetime
import time
import simplejson
from model import *
import model
import md5
def jsonLoadArchiveItem(data):
json_array = simplejson.loads(data)
for key in ('releaseDate', 'pubDate'):
json_array.pop('tg_flash', None)
for key in ('relDate', 'pubDate', 'modDate'):
json_array[key] = datetime.utcfromtimestamp(float(json_array[key]))
for key in ('rights', 'size'):
json_array[key] = int(json_array[key])
def jsonPrepareArchiveItem(sid, json_array):
json_array['archiveId'] = sid
json_array['itemType'] = json_array.pop('type', 'Text')
return json_array
def jsonImportArchiveItem(archiveId, json_array):
json_array = jsonPrepareArchiveItem(archiveId, json_array)
ArchiveItem( **json_array)
def jsonImportArchiveItem(archive, archiveItemId, json_array):
if isinstance(json_array, basestring):
json_array = jsonLoadArchiveItem(json_array)
salt = '%s/%s/%s' % (archive.archiveName, json_array['author'], json_array['title'])
hashID = md5.new(salt).hexdigest()
i = model.ArchiveItem(
archiveID=archive.id,
hashId = hashID,
archiveItemId = "%s" % archiveItemId,
description=json_array['description'],
rights=json_array['rights'],
text=json_array['text'],
author=json_array['author'],
pubDate=json_array['pubDate'],
relDate=json_array['relDate'],
modDate=json_array['modDate'],
archiveUrl=json_array['archiveUrl'],
downloadUrl=json_array['downloadUrl'],
html=json_array['html'],
genre=json_array['genre'],
title=json_array['title'],
size=json_array['size'],
itemType=json_array['itemType'],
icon= json_array['icon']
)

View file

@ -0,0 +1,22 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:py="http://purl.org/kid/ns#" py:extends="'master.kid'">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" py:replace="''"/>
<title>Oil of the 21st Century Archive</title>
</head>
<body>
<div style="width: 816px; margin-left: auto; margin-right: auto; margin-top: 96px; margin-bottom: 64px">
<div py:for="item in items" class="icon">
<div id="iconPoster${item.hashId}" class="iconPoster">
<a href="/view/${item.hashId}">
<img class="iconImage" src="/view/${item.hashId}/icon.jpg" onMouseOver="mouseOverIcon('${item.hashId}')" onMouseOut="mouseOutIcon('${item.hashId}')" />
</a>
</div>
<div id="iconText${it.imdb}" class="iconText textSmall" style="background: url(/view/${item.hashId}/icon_reflection.jpg); background-position: center top; background-repeat: no-repeat;">
<span class="textBold">${XML(item.iconTitle)}</span><br /><span class="textGrey">${item.getPreview(sort)}</span>
</div>
</div>
</div>
<div style="height: 64px;"></div>
</body>
</html>

View file

@ -0,0 +1,41 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:py="http://purl.org/kid/ns#" py:extends="'master.kid'">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" py:replace="''"/>
<title>Oil of the 21st Century Archive</title>
</head>
<body>
<div style="height: 96px"></div>
<!--<div class="centerDiv" style="width: 808px">-->
<!--<div class="centerDiv" style="width: 808px; height: 1px; margin-left: 4px; margin-right: 4px; margin-top: 0px; margin-bottom: 0px; background: rgb(32, 32, 32)"></div>-->
<div py:for="i, item in enumerate(items)" id="listItem${item.hashId}" class="centerDiv ${i%2 and 'odd' or 'even'}" style="width: 808px; height: 24px">
<a href="/view/${item.hashId}" class="linkElement" onMouseOver="mouseOverItem('${item.hashId}')" onMouseOut="mouseOutItem('${item.hashId}', '${i%2 and 'odd' or 'even'}')">
<div id="itemTitle${item.hashId}" class="inlineDiv textLarge" style="width: 396px; height: 16px; padding: 4px; text-align: left">
<span class="linkElement">${item.listTitle}</span>
</div>
<div id="itemDirector${item.hashId}" class="inlineDiv textLarge" style="width: 264px; height: 16px; padding: 4px; text-align: left">
${item.listDirector}
</div>
<div id="itemInfo${item.hashId}" class="inlineDiv" style="width: 124px; height: 16px; padding: 4px; text-align: right">
<div class="textMedium textGrey" style="margin-top: 1px">${item.getPreview(sort)}</div>
</div>
</a>
<!--<div class="inlineDiv" style="width: 808px; height: 1px; margin-left: 4px; margin-right: 4px; margin-top: 0px; margin-bottom: 0px; background: rgb(32, 32, 32)"></div>-->
</div>
<!--</div>-->
<!--
<table class="centerDiv" style="width: 808px">
<a py:for="item in items" href="/${item.hashId}" id="listItem${item.hashId}" style="width: 808px; background: rgb(0, 0, 0)" onMouseOver="mouseOverItem('${item.hashId}')" onMouseOut="mouseOutItem('${item.hashId}')">
<tr style="width: 808px; margin-bottom: 8px; background: rgb(0, 0, 0)">
<td class="textLarge" style="width: 400px; height: 24px; white-space: nowrap; overflow: hidden">${item.listTitle}</td>
<td style="width: 8px"></td>
<td class="textLarge" style="width: 264px; height: 24px; white-space: nowrap; overflow: hidden">${item.listDirector}</td>
<td style="width: 8px"></td>
<td class="textMedium textGrey" style="width: 128px; height: 24px; text-align: right; white-space: nowrap; overflow: hidden">${item.getField(sort)}</td>
</tr>
</a>
</table>
-->
<div style="height: 64px"></div>
</body>
</html>

View file

@ -0,0 +1,49 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:py="http://purl.org/kid/ns#" py:extends="'master.kid'">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" py:replace="''"/>
<title>Oil of the 21st Century Archive</title>
</head>
<body>
<div style="height: 96px"></div>
<!--<div class="centerDiv" style="width: 808px">-->
<!--<div class="centerDiv" style="width: 808px; height: 1px; margin-left: 4px; margin-right: 4px; margin-top: 0px; margin-bottom: 0px; background: rgb(32, 32, 32)"></div>-->
<div py:for="i, item in enumerate(items)" id="listItem${item.hashId}" class="centerDiv ${i%2 and 'odd' or 'even'}" style="width: 808px; height: 24px">
<a href="/view/${item.hashId}" class="linkElement" onMouseOver="mouseOverItem('${item.hashId}')" onMouseOut="mouseOutItem('${item.hashId}', '${i%2 and 'odd' or 'even'}')">
<div id="itemTitle${item.hashId}" class="inlineDiv textLarge" style="width: 396px; height: 16px; padding: 4px; text-align: left">
<span class="linkElement">${item.listTitle}</span>
<ul>
<li py:for="q in item.quotes(search['q'])">
<img src="${q['frame']}" class="quote_frame" align="left" />
<span class="quote_timestamp">${q['start'][0:-4]} - ${q['stop'][0:-4]}</span><br />
<span class="quote_text">${XML(q['quote'])}</span><br clear="all" />
</li>
</ul>
</div>
<div id="itemDirector${item.hashId}" class="inlineDiv textLarge" style="width: 264px; height: 16px; padding: 4px; text-align: left">
${item.listDirector}
</div>
<div id="itemInfo${item.hashId}" class="inlineDiv" style="width: 124px; height: 16px; padding: 4px; text-align: right">
<div class="textMedium textGrey" style="margin-top: 1px">${item.getPreview(sort)}</div>
</div>
</a>
<!--<div class="inlineDiv" style="width: 808px; height: 1px; margin-left: 4px; margin-right: 4px; margin-top: 0px; margin-bottom: 0px; background: rgb(32, 32, 32)"></div>-->
</div>
<!--</div>-->
<!--
<table class="centerDiv" style="width: 808px">
<a py:for="item in items" href="/view/${item.hashId}" id="listItem${item.hashId}" style="width: 808px; background: rgb(0, 0, 0)" onMouseOver="mouseOverItem('${item.hashId}')" onMouseOut="mouseOutItem('${item.hashId}')">
<tr style="width: 808px; margin-bottom: 8px; background: rgb(0, 0, 0)">
<td class="textLarge" style="width: 400px; height: 24px; white-space: nowrap; overflow: hidden">${item.listTitle}</td>
<td style="width: 8px"></td>
<td class="textLarge" style="width: 264px; height: 24px; white-space: nowrap; overflow: hidden">${item.listDirector}</td>
<td style="width: 8px"></td>
<td class="textMedium textGrey" style="width: 128px; height: 24px; text-align: right; white-space: nowrap; overflow: hidden">${item.getField(sort)}</td>
</tr>
</a>
</table>
-->
<div style="height: 64px"></div>
</body>
</html>