From 6524ceea8a776e04ab1b0ae91508f7b051b70b9a Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 20:00:30 +0530
Subject: [PATCH 1/8] annotation cleanup
---
README | 3 ++-
pandora/annotation/models.py | 25 ++++++++++++------------
pandora/annotation/utils.py | 38 +++++++++---------------------------
pandora/item/utils.py | 2 +-
pandora/padma.jsonc | 2 +-
pandora/user/models.py | 1 +
requirements.txt | 1 +
7 files changed, 27 insertions(+), 45 deletions(-)
diff --git a/README b/README
index 47765696..2cd6efee 100644
--- a/README
+++ b/README
@@ -12,7 +12,8 @@ python, bazaar, pip and virtualenv and several other python modules:
apt-get install bzr git subversion mercurial \
python-setuptools python-pip python-virtualenv ipython \
python-dev python-imaging python-numpy python-psycopg2 \
- python-geoip postgresql rabbitmq-server
+ python-geoip python-html5lib python-lxml \
+ postgresql rabbitmq-server
apt-get install oxframe oxtimeline
* Pan.do/ra
diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py
index 009c03ff..9c5088ad 100644
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@@ -34,6 +34,7 @@ class Annotation(models.Model):
layer = models.CharField(max_length=255, db_index=True)
value = models.TextField()
+ findvalue = models.TextField()
sortvalue = models.CharField(max_length=1000, null=True, blank=True, db_index=True)
def editable(self, user):
@@ -44,22 +45,25 @@ class Annotation(models.Model):
return True
return False
- def html(self):
- if self.layer == 'string':
- return utils.html_parser(self.value)
- else:
- return self.value
-
def set_public_id(self):
if self.id:
public_id = Annotation.objects.filter(item=self.item, id__lt=self.id).count() + 1
self.public_id = "%s/%s" % (self.item.itemId, ox.toAZ(public_id))
Annotation.objects.filter(id=self.id).update(public_id=self.public_id)
+ def get_layer(self):
+ for layer in settings.CONFIG['layers']:
+ if layer['id'] == self.layer:
+ return layer
+ return {}
+
def save(self, *args, **kwargs):
set_public_id = not self.id or not self.public_id
+ layer = self.get_layer()
if self.value:
- sortvalue = ox.stripTags(self.value).strip()
+ self.value = utils.cleanup_value(self.value, self.layer['tyoe'])
+ self.findvalue = ox.stripTags(self.value).strip()
+ sortvalue = self.findvalue
sortvalue = sort_string(sortvalue)
if sortvalue:
self.sortvalue = sortvalue[:1000]
@@ -69,12 +73,7 @@ class Annotation(models.Model):
self.sortvalue = None
#no clip or update clip
- def get_layer(id):
- for l in settings.CONFIG['layers']:
- if l['id'] == id:
- return l
- return {}
- private = get_layer(self.layer).get('private', False)
+ private = layer.get('private', False)
if not private:
if not self.clip or self.start != self.clip.start or self.end != self.clip.end:
self.clip, created = Clip.get_or_create(self.item, self.start, self.end)
diff --git a/pandora/annotation/utils.py b/pandora/annotation/utils.py
index 79ace73d..95328d7a 100644
--- a/pandora/annotation/utils.py
+++ b/pandora/annotation/utils.py
@@ -2,37 +2,17 @@
# ci:si:et:sw=4:sts=4:ts=4
import re
import ox
+import html5lib
-def html_parser(text, nofollow=True):
- text = text.replace('', '__i__').replace('', '__/i__')
- text = text.replace('', '__b__').replace('', '__/b__')
- #truns links into wiki links, make sure to only take http links
- text = re.sub('(.*?)', '[\\1 \\2]', text)
- text = ox.escape(text)
- text = text.replace('__i__', '').replace('__/i__', '')
- text = text.replace('__b__', '').replace('__/b__', '')
- if nofollow:
- nofollow_rel = ' rel="nofollow"'
+def cleanup_value(value, layer_type):
+ #FIXME: what about other types? location etc
+ if layer_type == 'text':
+ value = sanitize_fragment(value)
else:
- nofollow_rel = ''
+ value = ox.stripTags(value)
+ return value
- links = re.compile('(\[(http.*?) (.*?)\])').findall(text)
- for t, link, txt in links:
- link = link.replace('http', '__LINK__').replace('.', '__DOT__')
- ll = '%s' % (link, nofollow_rel, txt)
- text = text.replace(t, ll)
- links = re.compile('(\[(http.*?)\])').findall(text)
- for t, link in links:
- link = link.replace('http', '__LINK__').replace('.', '__DOT__')
- ll = '%s' % (link, nofollow_rel, link)
- text = text.replace(t, ll)
+def sanitize_fragment(html):
+ return html5lib.parseFragment(html).toxml()
- text = ox.urlize(text, nofollow=nofollow)
-
- #inpage links
- text = re.sub('\[(/.+?) (.+?)\]', '\\2', text)
-
- text = text.replace('__LINK__', 'http').replace('__DOT__', '.')
- text = text.replace("\n", '
')
- return text
diff --git a/pandora/item/utils.py b/pandora/item/utils.py
index f72a1bbf..c7c851b4 100644
--- a/pandora/item/utils.py
+++ b/pandora/item/utils.py
@@ -44,7 +44,7 @@ def sort_string(string):
#pad numbered titles
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string)
- return unicodedata.normalize('NFKD', string)
+ return unicodedata.normalize('NFKD', string).lower()
def sort_title(title):
diff --git a/pandora/padma.jsonc b/pandora/padma.jsonc
index afccd3a3..aa33c76f 100644
--- a/pandora/padma.jsonc
+++ b/pandora/padma.jsonc
@@ -415,7 +415,7 @@
"id": "keywords",
"title": "Keywords",
"overlap": true,
- "type": "text"
+ "type": "string"
},
{
"id": "descriptions",
diff --git a/pandora/user/models.py b/pandora/user/models.py
index 99a5d4bb..11dcac9f 100644
--- a/pandora/user/models.py
+++ b/pandora/user/models.py
@@ -187,6 +187,7 @@ def user_post_save(sender, instance, **kwargs):
profile, new = UserProfile.objects.get_or_create(user=instance)
if new and instance.is_superuser:
profile.level = len(settings.CONFIG['userLevels']) - 1
+ profile.newsletter = settings.CONFIG['user']['newsletter']
profile.save()
SessionData.objects.filter(user=instance).update(level=profile.level,
username=instance.username)
diff --git a/requirements.txt b/requirements.txt
index b37bc72b..66b3ab6e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ django-celery>2.1.1
-e git://github.com/bit/django-extensions.git#egg=django_extensions
-e git+git://github.com/dcramer/django-devserver#egg=django_devserver
gunicorn
+html5lib
From 926d9a352ee12610ba3d741e2d82423d78e37d8d Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 20:04:36 +0530
Subject: [PATCH 2/8] typo
---
pandora/annotation/models.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py
index 9c5088ad..6383c3d7 100644
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@@ -61,7 +61,7 @@ class Annotation(models.Model):
set_public_id = not self.id or not self.public_id
layer = self.get_layer()
if self.value:
- self.value = utils.cleanup_value(self.value, self.layer['tyoe'])
+ self.value = utils.cleanup_value(self.value, self.layer['type'])
self.findvalue = ox.stripTags(self.value).strip()
sortvalue = self.findvalue
sortvalue = sort_string(sortvalue)
From e673e26c5373d6f931136e931ab8d69ffa22dfaa Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 15:43:04 +0100
Subject: [PATCH 3/8] fix unicode issues
---
pandora/annotation/models.py | 5 ++---
pandora/annotation/utils.py | 2 +-
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py
index 6383c3d7..55cb04f4 100644
--- a/pandora/annotation/models.py
+++ b/pandora/annotation/models.py
@@ -61,10 +61,9 @@ class Annotation(models.Model):
set_public_id = not self.id or not self.public_id
layer = self.get_layer()
if self.value:
- self.value = utils.cleanup_value(self.value, self.layer['type'])
+ self.value = utils.cleanup_value(self.value, layer['type'])
self.findvalue = ox.stripTags(self.value).strip()
- sortvalue = self.findvalue
- sortvalue = sort_string(sortvalue)
+ sortvalue = sort_string(self.findvalue)
if sortvalue:
self.sortvalue = sortvalue[:1000]
else:
diff --git a/pandora/annotation/utils.py b/pandora/annotation/utils.py
index 95328d7a..b7a3980f 100644
--- a/pandora/annotation/utils.py
+++ b/pandora/annotation/utils.py
@@ -14,5 +14,5 @@ def cleanup_value(value, layer_type):
return value
def sanitize_fragment(html):
- return html5lib.parseFragment(html).toxml()
+ return html5lib.parseFragment(html).toxml().decode('utf-8')
From 28a577d69f2eb4824cc9a51054ff2e09c5ae176a Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 21:05:39 +0530
Subject: [PATCH 4/8] use annotation find value
---
pandora/annotation/managers.py | 1 +
pandora/clip/managers.py | 4 ++--
pandora/clip/views.py | 2 +-
pandora/item/utils.py | 2 +-
4 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/pandora/annotation/managers.py b/pandora/annotation/managers.py
index bffd71ac..d156b803 100644
--- a/pandora/annotation/managers.py
+++ b/pandora/annotation/managers.py
@@ -26,6 +26,7 @@ def parseCondition(condition, user):
'in': 'start',
'out': 'end',
'id': 'public_id',
+ 'value': 'findvalue',
}.get(k, k)
if not k:
k = 'name'
diff --git a/pandora/clip/managers.py b/pandora/clip/managers.py
index 4a748751..513036b0 100644
--- a/pandora/clip/managers.py
+++ b/pandora/clip/managers.py
@@ -27,7 +27,7 @@ def parseCondition(condition, user):
'in': 'start',
'out': 'end',
'place': 'annotations__places__id',
- 'text': 'annotations__value',
+ 'text': 'annotations__findvalue',
'user': 'annotations__user__username',
}.get(k, k)
if not k:
@@ -40,7 +40,7 @@ def parseCondition(condition, user):
for l in filter(lambda l: not l.get('private', False),
settings.CONFIG['layers'])]
if k in public_layers:
- return parseCondition({'key': 'annotations__value',
+ return parseCondition({'key': 'annotations__findvalue',
'value': v,
'operator': op}, user) \
& parseCondition({'key': 'annotations__layer',
diff --git a/pandora/clip/views.py b/pandora/clip/views.py
index 39f27d69..f6c2f04c 100644
--- a/pandora/clip/views.py
+++ b/pandora/clip/views.py
@@ -48,7 +48,7 @@ def order_query(qs, sort):
if key.startswith('clip:'):
key = e['key'][len('clip:'):]
key = {
- 'text': 'annotations__value',
+ 'text': 'annotations__sortvalue',
'position': 'start',
}.get(key, key)
elif key not in clip_keys:
diff --git a/pandora/item/utils.py b/pandora/item/utils.py
index c7c851b4..f72a1bbf 100644
--- a/pandora/item/utils.py
+++ b/pandora/item/utils.py
@@ -44,7 +44,7 @@ def sort_string(string):
#pad numbered titles
string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string)
- return unicodedata.normalize('NFKD', string).lower()
+ return unicodedata.normalize('NFKD', string)
def sort_title(title):
From d070cb616b3954b30c2d23f7f1c97795bef55752 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Tue, 27 Dec 2011 00:06:58 +0530
Subject: [PATCH 5/8] pad ids to len 7 for storage
---
pandora/clip/views.py | 4 ++--
pandora/item/models.py | 1 +
pandora/itemlist/models.py | 2 +-
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/pandora/clip/views.py b/pandora/clip/views.py
index f6c2f04c..53e768a5 100644
--- a/pandora/clip/views.py
+++ b/pandora/clip/views.py
@@ -111,7 +111,7 @@ def findClips(request):
Annotation.objects.filter(layer=layer, clip__in=ids))
elif 'position' in query:
qs = order_query(qs, query['sort'])
- ids = [i.public_id for i in qs]
+ ids = [i['public_id'] for i in qs.values('public_id')]
data['conditions'] = data['conditions'] + {
'value': data['position'],
'key': query['sort'][0]['key'],
@@ -123,7 +123,7 @@ def findClips(request):
response['data']['position'] = utils.get_positions(ids, [qs[0].itemId])[0]
elif 'positions' in data:
qs = order_query(qs, query['sort'])
- ids = [i.public_id for i in qs]
+ ids = [i['public_id'] for i in qs.values('public_id')]
response['data']['positions'] = utils.get_positions(ids, data['positions'])
else:
response['data']['items'] = qs.count()
diff --git a/pandora/item/models.py b/pandora/item/models.py
index 22f6d25f..f70fbd2d 100644
--- a/pandora/item/models.py
+++ b/pandora/item/models.py
@@ -772,6 +772,7 @@ class Item(models.Model):
def path(self, name=''):
h = self.itemId
+ h = (7-len(h))*'0' + h
return os.path.join('items', h[:2], h[2:4], h[4:6], h[6:], name)
'''
diff --git a/pandora/itemlist/models.py b/pandora/itemlist/models.py
index 4764a45c..837efce0 100644
--- a/pandora/itemlist/models.py
+++ b/pandora/itemlist/models.py
@@ -122,7 +122,7 @@ class List(models.Model):
return response
def path(self, name=''):
- h = "%06d" % self.id
+ h = "%07d" % self.id
return os.path.join('lists', h[:2], h[2:4], h[4:6], h[6:], name)
def update_icon(self):
From 3cd321a1169a73e3d21e2959098604ccd70eb27f Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 20:13:41 +0100
Subject: [PATCH 6/8] add filter of layer values(i.e. keywords)
---
pandora/item/models.py | 19 ++++++++++++++++++-
pandora/padma.jsonc | 37 ++++---------------------------------
2 files changed, 22 insertions(+), 34 deletions(-)
diff --git a/pandora/item/models.py b/pandora/item/models.py
index f70fbd2d..a9ce4174 100644
--- a/pandora/item/models.py
+++ b/pandora/item/models.py
@@ -541,7 +541,7 @@ class Item(models.Model):
'\n'.join([f.path for f in self.files.all()]))
elif key['type'] == 'layer':
qs = Annotation.objects.filter(layer=i, item=self).order_by('start')
- save(i, '\n'.join([l.value for l in qs]))
+ save(i, '\n'.join([l.findvalue for l in qs]))
elif i != '*' and i not in self.facet_keys:
value = self.get(i)
if isinstance(value, list):
@@ -733,6 +733,22 @@ class Item(models.Model):
#update cached values in clips
self.clips.all().update(director=s.director, title=s.title)
+ def update_layer_facets(self):
+ filters = [f['id'] for f in settings.CONFIG['filters']]
+ for layer in settings.CONFIG['layers']:
+ if layer['id'] in filters:
+ key = layer['id']
+ current_values = [a['value']
+ for a in self.annotations.filter(layer=key).distinct().values('value')]
+ saved_values = [i.value for i in Facet.objects.filter(item=self, key=key)]
+ removed_values = filter(lambda i: i not in current_values, saved_values)
+ if removed_values:
+ Facet.objects.filter(item=self, key=key, value__in=removed_values).delete()
+ for value in current_values:
+ if value not in saved_values:
+ sortvalue = value
+ Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue)
+
def update_facets(self):
for key in self.facet_keys + ['title']:
current_values = self.get(key, [])
@@ -769,6 +785,7 @@ class Item(models.Model):
if key in self.person_keys + ['name']:
sortvalue = get_name_sort(value)
Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue)
+ self.update_layer_facets()
def path(self, name=''):
h = self.itemId
diff --git a/pandora/padma.jsonc b/pandora/padma.jsonc
index aa33c76f..346b61d7 100644
--- a/pandora/padma.jsonc
+++ b/pandora/padma.jsonc
@@ -49,12 +49,7 @@
{"id": "year", "title": "Year", "type": "integer"},
{"id": "language", "title": "Language", "type": "string"},
{"id": "category", "title": "Category", "type": "string"},
- {"id": "writer", "title": "Writer", "type": "string"},
- {"id": "producer", "title": "Producer", "type": "string"},
- {"id": "cinematographer", "title": "Cinematographer", "type": "string"},
- {"id": "editor", "title": "Editor", "type": "string"},
- {"id": "actor", "title": "Actor", "type": "string"},
- {"id": "keyword", "title": "Keyword", "type": "string"}
+ {"id": "keywords", "title": "Keyword", "type": "string"}
],
/*
An itemKey must have the following properties:
@@ -174,30 +169,6 @@
"filter": true,
"find": true
},
- {
- "id": "genre",
- "title": "Genre",
- "type": ["string"],
- "autocomplete": true,
- "columnWidth": 120,
- "filter": true,
- "find": true
- },
- {
- "id": "keyword",
- "title": "Keyword",
- "type": ["string"],
- "autocomplete": true,
- "filter": true,
- "find": true
- },
- {
- "id": "numberofkeywords",
- "title": "Number of Keywords",
- "type": "integer",
- "columnWidth": 60,
- "value": {"key": "keyword", "type": "length"}
- },
{
"id": "description",
"title": "Description",
@@ -503,10 +474,10 @@
}
},
"filters": [
- {"id": "collection", "sort": [{"key": "name", "operator": "-"}]},
- {"id": "source", "sort": [{"key": "items", "operator": "-"}]},
+ {"id": "collection", "sort": [{"key": "name", "operator": "+"}]},
+ {"id": "source", "sort": [{"key": "name", "operator": "+"}]},
{"id": "category", "sort": [{"key": "items", "operator": "-"}]},
- {"id": "keyword", "sort": [{"key": "items", "operator": "-"}]},
+ {"id": "keywords", "sort": [{"key": "items", "operator": "-"}]},
{"id": "location", "sort": [{"key": "items", "operator": "-"}]}
],
"filtersSize": 176,
From ae556c62f0a31d092b30858811c8c654e1bfaf0c Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 20:21:20 +0100
Subject: [PATCH 7/8] only add if not exists already
---
pandora/item/models.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pandora/item/models.py b/pandora/item/models.py
index a9ce4174..47bf5d39 100644
--- a/pandora/item/models.py
+++ b/pandora/item/models.py
@@ -747,7 +747,7 @@ class Item(models.Model):
for value in current_values:
if value not in saved_values:
sortvalue = value
- Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue)
+ Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue)
def update_facets(self):
for key in self.facet_keys + ['title']:
From e3b948f08f351ee8d49ecef69b51e054fbf4be43 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 26 Dec 2011 21:30:21 +0100
Subject: [PATCH 8/8] use facets if possible for layer values
---
pandora/item/managers.py | 8 ++++++--
pandora/item/models.py | 2 +-
pandora/padma.jsonc | 4 ++--
static/js/pandora/placesDialog.js | 5 ++++-
4 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/pandora/item/managers.py b/pandora/item/managers.py
index 40b18ba0..56d4088c 100644
--- a/pandora/item/managers.py
+++ b/pandora/item/managers.py
@@ -39,6 +39,10 @@ def parseCondition(condition, user):
else:
exclude = False
+ facet_keys = models.Item.facet_keys + ['title']
+ for f in settings.CONFIG['filters']:
+ if f['id'] not in facet_keys:
+ facet_keys.append(f['id'])
key_type = settings.CONFIG['keys'].get(k, {'type':'string'}).get('type')
if isinstance(key_type, list):
key_type = key_type[0]
@@ -48,8 +52,8 @@ def parseCondition(condition, user):
'text': 'string',
'year': 'string',
'length': 'string',
- 'list': 'list',
'layer': 'string',
+ 'list': 'list',
}.get(key_type, key_type)
if k == 'list':
key_type = ''
@@ -95,7 +99,7 @@ def parseCondition(condition, user):
value_key = 'find__value'
else:
value_key = k
- if k in models.Item.facet_keys + ['title']:
+ if k in facet_keys:
in_find = False
facet_value = 'facets__value%s' % {
'==': '__iexact',
diff --git a/pandora/item/models.py b/pandora/item/models.py
index 47bf5d39..8cc97910 100644
--- a/pandora/item/models.py
+++ b/pandora/item/models.py
@@ -541,7 +541,7 @@ class Item(models.Model):
'\n'.join([f.path for f in self.files.all()]))
elif key['type'] == 'layer':
qs = Annotation.objects.filter(layer=i, item=self).order_by('start')
- save(i, '\n'.join([l.findvalue for l in qs]))
+ save(i, u'\n'.join([l.findvalue for l in qs]))
elif i != '*' and i not in self.facet_keys:
value = self.get(i)
if isinstance(value, list):
diff --git a/pandora/padma.jsonc b/pandora/padma.jsonc
index 346b61d7..5f353ef1 100644
--- a/pandora/padma.jsonc
+++ b/pandora/padma.jsonc
@@ -45,7 +45,7 @@
{"id": "collection", "title": "Collection", "type": "string"},
{"id": "source", "title": "Source", "type": "string"},
{"id": "director", "title": "Director", "type": "string"},
- {"id": "location", "title": "Location", "type": "string"},
+ {"id": "locations", "title": "Location", "type": "string"},
{"id": "year", "title": "Year", "type": "integer"},
{"id": "language", "title": "Language", "type": "string"},
{"id": "category", "title": "Category", "type": "string"},
@@ -478,7 +478,7 @@
{"id": "source", "sort": [{"key": "name", "operator": "+"}]},
{"id": "category", "sort": [{"key": "items", "operator": "-"}]},
{"id": "keywords", "sort": [{"key": "items", "operator": "-"}]},
- {"id": "location", "sort": [{"key": "items", "operator": "-"}]}
+ {"id": "locations", "sort": [{"key": "items", "operator": "-"}]}
],
"filtersSize": 176,
"find": {"conditions": [], "operator": "&"},
diff --git a/static/js/pandora/placesDialog.js b/static/js/pandora/placesDialog.js
index fa40663b..c7b6d2e7 100644
--- a/static/js/pandora/placesDialog.js
+++ b/static/js/pandora/placesDialog.js
@@ -30,7 +30,10 @@ pandora.ui.placesDialog = function() {
pandora.api.findClips({
query: {
conditions: names.map(function(name) {
- return {key: 'subtitles', value: name, operator: '='};
+ //FIXME: this should be more generic
+ return Ox.getObjectById(pandora.site.layers, 'subtitles')
+ ? {key: 'subtitles', value: name, operator: '='}
+ : {key: 'locations', value: name, operator: '=='};
}),
operator: names.length == 1 ? '&' : '|'
}