From 6524ceea8a776e04ab1b0ae91508f7b051b70b9a Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 20:00:30 +0530 Subject: [PATCH 1/8] annotation cleanup --- README | 3 ++- pandora/annotation/models.py | 25 ++++++++++++------------ pandora/annotation/utils.py | 38 +++++++++--------------------------- pandora/item/utils.py | 2 +- pandora/padma.jsonc | 2 +- pandora/user/models.py | 1 + requirements.txt | 1 + 7 files changed, 27 insertions(+), 45 deletions(-) diff --git a/README b/README index 47765696..2cd6efee 100644 --- a/README +++ b/README @@ -12,7 +12,8 @@ python, bazaar, pip and virtualenv and several other python modules: apt-get install bzr git subversion mercurial \ python-setuptools python-pip python-virtualenv ipython \ python-dev python-imaging python-numpy python-psycopg2 \ - python-geoip postgresql rabbitmq-server + python-geoip python-html5lib python-lxml \ + postgresql rabbitmq-server apt-get install oxframe oxtimeline * Pan.do/ra diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 009c03ff..9c5088ad 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -34,6 +34,7 @@ class Annotation(models.Model): layer = models.CharField(max_length=255, db_index=True) value = models.TextField() + findvalue = models.TextField() sortvalue = models.CharField(max_length=1000, null=True, blank=True, db_index=True) def editable(self, user): @@ -44,22 +45,25 @@ class Annotation(models.Model): return True return False - def html(self): - if self.layer == 'string': - return utils.html_parser(self.value) - else: - return self.value - def set_public_id(self): if self.id: public_id = Annotation.objects.filter(item=self.item, id__lt=self.id).count() + 1 self.public_id = "%s/%s" % (self.item.itemId, ox.toAZ(public_id)) Annotation.objects.filter(id=self.id).update(public_id=self.public_id) + def get_layer(self): + for layer in settings.CONFIG['layers']: + if layer['id'] == self.layer: + return layer + return {} + def save(self, *args, **kwargs): set_public_id = not self.id or not self.public_id + layer = self.get_layer() if self.value: - sortvalue = ox.stripTags(self.value).strip() + self.value = utils.cleanup_value(self.value, self.layer['tyoe']) + self.findvalue = ox.stripTags(self.value).strip() + sortvalue = self.findvalue sortvalue = sort_string(sortvalue) if sortvalue: self.sortvalue = sortvalue[:1000] @@ -69,12 +73,7 @@ class Annotation(models.Model): self.sortvalue = None #no clip or update clip - def get_layer(id): - for l in settings.CONFIG['layers']: - if l['id'] == id: - return l - return {} - private = get_layer(self.layer).get('private', False) + private = layer.get('private', False) if not private: if not self.clip or self.start != self.clip.start or self.end != self.clip.end: self.clip, created = Clip.get_or_create(self.item, self.start, self.end) diff --git a/pandora/annotation/utils.py b/pandora/annotation/utils.py index 79ace73d..95328d7a 100644 --- a/pandora/annotation/utils.py +++ b/pandora/annotation/utils.py @@ -2,37 +2,17 @@ # ci:si:et:sw=4:sts=4:ts=4 import re import ox +import html5lib -def html_parser(text, nofollow=True): - text = text.replace('', '__i__').replace('', '__/i__') - text = text.replace('', '__b__').replace('', '__/b__') - #truns links into wiki links, make sure to only take http links - text = re.sub('(.*?)', '[\\1 \\2]', text) - text = ox.escape(text) - text = text.replace('__i__', '').replace('__/i__', '') - text = text.replace('__b__', '').replace('__/b__', '') - if nofollow: - nofollow_rel = ' rel="nofollow"' +def cleanup_value(value, layer_type): + #FIXME: what about other types? location etc + if layer_type == 'text': + value = sanitize_fragment(value) else: - nofollow_rel = '' + value = ox.stripTags(value) + return value - links = re.compile('(\[(http.*?) (.*?)\])').findall(text) - for t, link, txt in links: - link = link.replace('http', '__LINK__').replace('.', '__DOT__') - ll = '%s' % (link, nofollow_rel, txt) - text = text.replace(t, ll) - links = re.compile('(\[(http.*?)\])').findall(text) - for t, link in links: - link = link.replace('http', '__LINK__').replace('.', '__DOT__') - ll = '%s' % (link, nofollow_rel, link) - text = text.replace(t, ll) +def sanitize_fragment(html): + return html5lib.parseFragment(html).toxml() - text = ox.urlize(text, nofollow=nofollow) - - #inpage links - text = re.sub('\[(/.+?) (.+?)\]', '\\2', text) - - text = text.replace('__LINK__', 'http').replace('__DOT__', '.') - text = text.replace("\n", '
') - return text diff --git a/pandora/item/utils.py b/pandora/item/utils.py index f72a1bbf..c7c851b4 100644 --- a/pandora/item/utils.py +++ b/pandora/item/utils.py @@ -44,7 +44,7 @@ def sort_string(string): #pad numbered titles string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string) - return unicodedata.normalize('NFKD', string) + return unicodedata.normalize('NFKD', string).lower() def sort_title(title): diff --git a/pandora/padma.jsonc b/pandora/padma.jsonc index afccd3a3..aa33c76f 100644 --- a/pandora/padma.jsonc +++ b/pandora/padma.jsonc @@ -415,7 +415,7 @@ "id": "keywords", "title": "Keywords", "overlap": true, - "type": "text" + "type": "string" }, { "id": "descriptions", diff --git a/pandora/user/models.py b/pandora/user/models.py index 99a5d4bb..11dcac9f 100644 --- a/pandora/user/models.py +++ b/pandora/user/models.py @@ -187,6 +187,7 @@ def user_post_save(sender, instance, **kwargs): profile, new = UserProfile.objects.get_or_create(user=instance) if new and instance.is_superuser: profile.level = len(settings.CONFIG['userLevels']) - 1 + profile.newsletter = settings.CONFIG['user']['newsletter'] profile.save() SessionData.objects.filter(user=instance).update(level=profile.level, username=instance.username) diff --git a/requirements.txt b/requirements.txt index b37bc72b..66b3ab6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ django-celery>2.1.1 -e git://github.com/bit/django-extensions.git#egg=django_extensions -e git+git://github.com/dcramer/django-devserver#egg=django_devserver gunicorn +html5lib From 926d9a352ee12610ba3d741e2d82423d78e37d8d Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 20:04:36 +0530 Subject: [PATCH 2/8] typo --- pandora/annotation/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 9c5088ad..6383c3d7 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -61,7 +61,7 @@ class Annotation(models.Model): set_public_id = not self.id or not self.public_id layer = self.get_layer() if self.value: - self.value = utils.cleanup_value(self.value, self.layer['tyoe']) + self.value = utils.cleanup_value(self.value, self.layer['type']) self.findvalue = ox.stripTags(self.value).strip() sortvalue = self.findvalue sortvalue = sort_string(sortvalue) From e673e26c5373d6f931136e931ab8d69ffa22dfaa Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 15:43:04 +0100 Subject: [PATCH 3/8] fix unicode issues --- pandora/annotation/models.py | 5 ++--- pandora/annotation/utils.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandora/annotation/models.py b/pandora/annotation/models.py index 6383c3d7..55cb04f4 100644 --- a/pandora/annotation/models.py +++ b/pandora/annotation/models.py @@ -61,10 +61,9 @@ class Annotation(models.Model): set_public_id = not self.id or not self.public_id layer = self.get_layer() if self.value: - self.value = utils.cleanup_value(self.value, self.layer['type']) + self.value = utils.cleanup_value(self.value, layer['type']) self.findvalue = ox.stripTags(self.value).strip() - sortvalue = self.findvalue - sortvalue = sort_string(sortvalue) + sortvalue = sort_string(self.findvalue) if sortvalue: self.sortvalue = sortvalue[:1000] else: diff --git a/pandora/annotation/utils.py b/pandora/annotation/utils.py index 95328d7a..b7a3980f 100644 --- a/pandora/annotation/utils.py +++ b/pandora/annotation/utils.py @@ -14,5 +14,5 @@ def cleanup_value(value, layer_type): return value def sanitize_fragment(html): - return html5lib.parseFragment(html).toxml() + return html5lib.parseFragment(html).toxml().decode('utf-8') From 28a577d69f2eb4824cc9a51054ff2e09c5ae176a Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 21:05:39 +0530 Subject: [PATCH 4/8] use annotation find value --- pandora/annotation/managers.py | 1 + pandora/clip/managers.py | 4 ++-- pandora/clip/views.py | 2 +- pandora/item/utils.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandora/annotation/managers.py b/pandora/annotation/managers.py index bffd71ac..d156b803 100644 --- a/pandora/annotation/managers.py +++ b/pandora/annotation/managers.py @@ -26,6 +26,7 @@ def parseCondition(condition, user): 'in': 'start', 'out': 'end', 'id': 'public_id', + 'value': 'findvalue', }.get(k, k) if not k: k = 'name' diff --git a/pandora/clip/managers.py b/pandora/clip/managers.py index 4a748751..513036b0 100644 --- a/pandora/clip/managers.py +++ b/pandora/clip/managers.py @@ -27,7 +27,7 @@ def parseCondition(condition, user): 'in': 'start', 'out': 'end', 'place': 'annotations__places__id', - 'text': 'annotations__value', + 'text': 'annotations__findvalue', 'user': 'annotations__user__username', }.get(k, k) if not k: @@ -40,7 +40,7 @@ def parseCondition(condition, user): for l in filter(lambda l: not l.get('private', False), settings.CONFIG['layers'])] if k in public_layers: - return parseCondition({'key': 'annotations__value', + return parseCondition({'key': 'annotations__findvalue', 'value': v, 'operator': op}, user) \ & parseCondition({'key': 'annotations__layer', diff --git a/pandora/clip/views.py b/pandora/clip/views.py index 39f27d69..f6c2f04c 100644 --- a/pandora/clip/views.py +++ b/pandora/clip/views.py @@ -48,7 +48,7 @@ def order_query(qs, sort): if key.startswith('clip:'): key = e['key'][len('clip:'):] key = { - 'text': 'annotations__value', + 'text': 'annotations__sortvalue', 'position': 'start', }.get(key, key) elif key not in clip_keys: diff --git a/pandora/item/utils.py b/pandora/item/utils.py index c7c851b4..f72a1bbf 100644 --- a/pandora/item/utils.py +++ b/pandora/item/utils.py @@ -44,7 +44,7 @@ def sort_string(string): #pad numbered titles string = re.sub('(\d+)', lambda x: '%010d' % int(x.group(0)), string) - return unicodedata.normalize('NFKD', string).lower() + return unicodedata.normalize('NFKD', string) def sort_title(title): From d070cb616b3954b30c2d23f7f1c97795bef55752 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Tue, 27 Dec 2011 00:06:58 +0530 Subject: [PATCH 5/8] pad ids to len 7 for storage --- pandora/clip/views.py | 4 ++-- pandora/item/models.py | 1 + pandora/itemlist/models.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandora/clip/views.py b/pandora/clip/views.py index f6c2f04c..53e768a5 100644 --- a/pandora/clip/views.py +++ b/pandora/clip/views.py @@ -111,7 +111,7 @@ def findClips(request): Annotation.objects.filter(layer=layer, clip__in=ids)) elif 'position' in query: qs = order_query(qs, query['sort']) - ids = [i.public_id for i in qs] + ids = [i['public_id'] for i in qs.values('public_id')] data['conditions'] = data['conditions'] + { 'value': data['position'], 'key': query['sort'][0]['key'], @@ -123,7 +123,7 @@ def findClips(request): response['data']['position'] = utils.get_positions(ids, [qs[0].itemId])[0] elif 'positions' in data: qs = order_query(qs, query['sort']) - ids = [i.public_id for i in qs] + ids = [i['public_id'] for i in qs.values('public_id')] response['data']['positions'] = utils.get_positions(ids, data['positions']) else: response['data']['items'] = qs.count() diff --git a/pandora/item/models.py b/pandora/item/models.py index 22f6d25f..f70fbd2d 100644 --- a/pandora/item/models.py +++ b/pandora/item/models.py @@ -772,6 +772,7 @@ class Item(models.Model): def path(self, name=''): h = self.itemId + h = (7-len(h))*'0' + h return os.path.join('items', h[:2], h[2:4], h[4:6], h[6:], name) ''' diff --git a/pandora/itemlist/models.py b/pandora/itemlist/models.py index 4764a45c..837efce0 100644 --- a/pandora/itemlist/models.py +++ b/pandora/itemlist/models.py @@ -122,7 +122,7 @@ class List(models.Model): return response def path(self, name=''): - h = "%06d" % self.id + h = "%07d" % self.id return os.path.join('lists', h[:2], h[2:4], h[4:6], h[6:], name) def update_icon(self): From 3cd321a1169a73e3d21e2959098604ccd70eb27f Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 20:13:41 +0100 Subject: [PATCH 6/8] add filter of layer values(i.e. keywords) --- pandora/item/models.py | 19 ++++++++++++++++++- pandora/padma.jsonc | 37 ++++--------------------------------- 2 files changed, 22 insertions(+), 34 deletions(-) diff --git a/pandora/item/models.py b/pandora/item/models.py index f70fbd2d..a9ce4174 100644 --- a/pandora/item/models.py +++ b/pandora/item/models.py @@ -541,7 +541,7 @@ class Item(models.Model): '\n'.join([f.path for f in self.files.all()])) elif key['type'] == 'layer': qs = Annotation.objects.filter(layer=i, item=self).order_by('start') - save(i, '\n'.join([l.value for l in qs])) + save(i, '\n'.join([l.findvalue for l in qs])) elif i != '*' and i not in self.facet_keys: value = self.get(i) if isinstance(value, list): @@ -733,6 +733,22 @@ class Item(models.Model): #update cached values in clips self.clips.all().update(director=s.director, title=s.title) + def update_layer_facets(self): + filters = [f['id'] for f in settings.CONFIG['filters']] + for layer in settings.CONFIG['layers']: + if layer['id'] in filters: + key = layer['id'] + current_values = [a['value'] + for a in self.annotations.filter(layer=key).distinct().values('value')] + saved_values = [i.value for i in Facet.objects.filter(item=self, key=key)] + removed_values = filter(lambda i: i not in current_values, saved_values) + if removed_values: + Facet.objects.filter(item=self, key=key, value__in=removed_values).delete() + for value in current_values: + if value not in saved_values: + sortvalue = value + Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue) + def update_facets(self): for key in self.facet_keys + ['title']: current_values = self.get(key, []) @@ -769,6 +785,7 @@ class Item(models.Model): if key in self.person_keys + ['name']: sortvalue = get_name_sort(value) Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue) + self.update_layer_facets() def path(self, name=''): h = self.itemId diff --git a/pandora/padma.jsonc b/pandora/padma.jsonc index aa33c76f..346b61d7 100644 --- a/pandora/padma.jsonc +++ b/pandora/padma.jsonc @@ -49,12 +49,7 @@ {"id": "year", "title": "Year", "type": "integer"}, {"id": "language", "title": "Language", "type": "string"}, {"id": "category", "title": "Category", "type": "string"}, - {"id": "writer", "title": "Writer", "type": "string"}, - {"id": "producer", "title": "Producer", "type": "string"}, - {"id": "cinematographer", "title": "Cinematographer", "type": "string"}, - {"id": "editor", "title": "Editor", "type": "string"}, - {"id": "actor", "title": "Actor", "type": "string"}, - {"id": "keyword", "title": "Keyword", "type": "string"} + {"id": "keywords", "title": "Keyword", "type": "string"} ], /* An itemKey must have the following properties: @@ -174,30 +169,6 @@ "filter": true, "find": true }, - { - "id": "genre", - "title": "Genre", - "type": ["string"], - "autocomplete": true, - "columnWidth": 120, - "filter": true, - "find": true - }, - { - "id": "keyword", - "title": "Keyword", - "type": ["string"], - "autocomplete": true, - "filter": true, - "find": true - }, - { - "id": "numberofkeywords", - "title": "Number of Keywords", - "type": "integer", - "columnWidth": 60, - "value": {"key": "keyword", "type": "length"} - }, { "id": "description", "title": "Description", @@ -503,10 +474,10 @@ } }, "filters": [ - {"id": "collection", "sort": [{"key": "name", "operator": "-"}]}, - {"id": "source", "sort": [{"key": "items", "operator": "-"}]}, + {"id": "collection", "sort": [{"key": "name", "operator": "+"}]}, + {"id": "source", "sort": [{"key": "name", "operator": "+"}]}, {"id": "category", "sort": [{"key": "items", "operator": "-"}]}, - {"id": "keyword", "sort": [{"key": "items", "operator": "-"}]}, + {"id": "keywords", "sort": [{"key": "items", "operator": "-"}]}, {"id": "location", "sort": [{"key": "items", "operator": "-"}]} ], "filtersSize": 176, From ae556c62f0a31d092b30858811c8c654e1bfaf0c Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 20:21:20 +0100 Subject: [PATCH 7/8] only add if not exists already --- pandora/item/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandora/item/models.py b/pandora/item/models.py index a9ce4174..47bf5d39 100644 --- a/pandora/item/models.py +++ b/pandora/item/models.py @@ -747,7 +747,7 @@ class Item(models.Model): for value in current_values: if value not in saved_values: sortvalue = value - Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue) + Facet.objects.get_or_create(item=self, key=key, value=value, sortvalue=sortvalue) def update_facets(self): for key in self.facet_keys + ['title']: From e3b948f08f351ee8d49ecef69b51e054fbf4be43 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Mon, 26 Dec 2011 21:30:21 +0100 Subject: [PATCH 8/8] use facets if possible for layer values --- pandora/item/managers.py | 8 ++++++-- pandora/item/models.py | 2 +- pandora/padma.jsonc | 4 ++-- static/js/pandora/placesDialog.js | 5 ++++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandora/item/managers.py b/pandora/item/managers.py index 40b18ba0..56d4088c 100644 --- a/pandora/item/managers.py +++ b/pandora/item/managers.py @@ -39,6 +39,10 @@ def parseCondition(condition, user): else: exclude = False + facet_keys = models.Item.facet_keys + ['title'] + for f in settings.CONFIG['filters']: + if f['id'] not in facet_keys: + facet_keys.append(f['id']) key_type = settings.CONFIG['keys'].get(k, {'type':'string'}).get('type') if isinstance(key_type, list): key_type = key_type[0] @@ -48,8 +52,8 @@ def parseCondition(condition, user): 'text': 'string', 'year': 'string', 'length': 'string', - 'list': 'list', 'layer': 'string', + 'list': 'list', }.get(key_type, key_type) if k == 'list': key_type = '' @@ -95,7 +99,7 @@ def parseCondition(condition, user): value_key = 'find__value' else: value_key = k - if k in models.Item.facet_keys + ['title']: + if k in facet_keys: in_find = False facet_value = 'facets__value%s' % { '==': '__iexact', diff --git a/pandora/item/models.py b/pandora/item/models.py index 47bf5d39..8cc97910 100644 --- a/pandora/item/models.py +++ b/pandora/item/models.py @@ -541,7 +541,7 @@ class Item(models.Model): '\n'.join([f.path for f in self.files.all()])) elif key['type'] == 'layer': qs = Annotation.objects.filter(layer=i, item=self).order_by('start') - save(i, '\n'.join([l.findvalue for l in qs])) + save(i, u'\n'.join([l.findvalue for l in qs])) elif i != '*' and i not in self.facet_keys: value = self.get(i) if isinstance(value, list): diff --git a/pandora/padma.jsonc b/pandora/padma.jsonc index 346b61d7..5f353ef1 100644 --- a/pandora/padma.jsonc +++ b/pandora/padma.jsonc @@ -45,7 +45,7 @@ {"id": "collection", "title": "Collection", "type": "string"}, {"id": "source", "title": "Source", "type": "string"}, {"id": "director", "title": "Director", "type": "string"}, - {"id": "location", "title": "Location", "type": "string"}, + {"id": "locations", "title": "Location", "type": "string"}, {"id": "year", "title": "Year", "type": "integer"}, {"id": "language", "title": "Language", "type": "string"}, {"id": "category", "title": "Category", "type": "string"}, @@ -478,7 +478,7 @@ {"id": "source", "sort": [{"key": "name", "operator": "+"}]}, {"id": "category", "sort": [{"key": "items", "operator": "-"}]}, {"id": "keywords", "sort": [{"key": "items", "operator": "-"}]}, - {"id": "location", "sort": [{"key": "items", "operator": "-"}]} + {"id": "locations", "sort": [{"key": "items", "operator": "-"}]} ], "filtersSize": 176, "find": {"conditions": [], "operator": "&"}, diff --git a/static/js/pandora/placesDialog.js b/static/js/pandora/placesDialog.js index fa40663b..c7b6d2e7 100644 --- a/static/js/pandora/placesDialog.js +++ b/static/js/pandora/placesDialog.js @@ -30,7 +30,10 @@ pandora.ui.placesDialog = function() { pandora.api.findClips({ query: { conditions: names.map(function(name) { - return {key: 'subtitles', value: name, operator: '='}; + //FIXME: this should be more generic + return Ox.getObjectById(pandora.site.layers, 'subtitles') + ? {key: 'subtitles', value: name, operator: '='} + : {key: 'locations', value: name, operator: '=='}; }), operator: names.length == 1 ? '&' : '|' }