From 52643a9218f60a0ac5344b04aa4dc2291d237f69 Mon Sep 17 00:00:00 2001 From: j <0x006A@0x2620.org> Date: Wed, 11 Jan 2012 13:12:32 +0530 Subject: [PATCH] create sitemap --- pandora/app/views.py | 9 ++--- pandora/item/tasks.py | 84 ++++++++++++++++++++++++++++++++++++++++++- pandora/item/views.py | 13 +++++++ pandora/urls.py | 2 ++ 4 files changed, 103 insertions(+), 5 deletions(-) diff --git a/pandora/app/views.py b/pandora/app/views.py index 564e46b2f..2a428f9a5 100644 --- a/pandora/app/views.py +++ b/pandora/app/views.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -import re - import copy from django.shortcuts import render_to_response, redirect @@ -25,7 +23,6 @@ def intro(request): context = RequestContext(request, {'settings': settings}) return render_to_response('intro.html', context) - def index(request): context = RequestContext(request, { 'base_url': request.build_absolute_uri('/'), @@ -33,7 +30,6 @@ def index(request): }) return render_to_response('index.html', context) - def embed(request, id): context = RequestContext(request, { 'settings': settings @@ -76,6 +72,11 @@ def opensearch_xml(request): 'application/xml' ) +def robots_txt(request, url): + return HttpResponse( + 'User-agent: *\nDisallow:\nSitemap: %s\n' % request.build_absolute_uri('/sitemap.xml.gz'), + 'text/plain' + ) def getPage(request): ''' diff --git a/pandora/item/tasks.py b/pandora/item/tasks.py index 491be06a2..b7114b08e 100644 --- a/pandora/item/tasks.py +++ b/pandora/item/tasks.py @@ -1,7 +1,12 @@ # -*- coding: utf-8 -*- # vi:si:et:sw=4:sts=4:ts=4 -from datetime import timedelta +import os +from datetime import timedelta, datetime +import gzip + +from django.conf import settings +from ox.utils import ET from celery.task import task, periodic_task import models @@ -34,3 +39,80 @@ def load_subtitles(itemId): item = models.Item.objects.get(itemId=itemId) item.load_subtitles() +@task(ignore_resulsts=True, queue='default') +def update_sitemap(base_url): + sitemap = os.path.abspath(os.path.join(settings.MEDIA_ROOT, 'sitemap.xml.gz')) + + def absolute_url(url): + return base_url + url + + urlset = ET.Element('urlset') + urlset.attrib['xmlns'] = "http://www.sitemaps.org/schemas/sitemap/0.9" + urlset.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" + urlset.attrib['xsi:schemaLocation'] = "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" + urlset.attrib['xmlns:video']= "http://www.google.com/schemas/sitemap-video/1.0" + + url = ET.SubElement(urlset, "url") + loc = ET.SubElement(url, "loc") + loc.text = absolute_url('') + # always, hourly, daily, weekly, monthly, yearly, never + changefreq = ET.SubElement(url, "changefreq") + changefreq.text = 'daily' + # This date should be in W3C Datetime format, can be %Y-%m-%d + lastmod = ET.SubElement(url, "lastmod") + lastmod.text = datetime.now().strftime("%Y-%m-%d") + # priority of page on site values 0.1 - 1.0 + priority = ET.SubElement(url, "priority") + priority.text = '1.0' + + for page in [s['id'] for s in settings.CONFIG['sitePages']]: + url = ET.SubElement(urlset, "url") + loc = ET.SubElement(url, "loc") + loc.text = absolute_url(page) + # always, hourly, daily, weekly, monthly, yearly, never + changefreq = ET.SubElement(url, "changefreq") + changefreq.text = 'monthly' + # priority of page on site values 0.1 - 1.0 + priority = ET.SubElement(url, "priority") + priority.text = '1.0' + + allowed_level = settings.CONFIG['capabilities']['canSeeItem']['guest'] + for i in models.Item.objects.filter(level__lte=allowed_level): + url = ET.SubElement(urlset, "url") + # URL of the page. This URL must begin with the protocol (such as http) + loc = ET.SubElement(url, "loc") + loc.text = absolute_url("%s" % i.itemId) + # This date should be in W3C Datetime format, can be %Y-%m-%d + lastmod = ET.SubElement(url, "lastmod") + lastmod.text = i.modified.strftime("%Y-%m-%d") + # always, hourly, daily, weekly, monthly, yearly, never + changefreq = ET.SubElement(url, "changefreq") + changefreq.text = 'monthly' + # priority of page on site values 0.1 - 1.0 + priority = ET.SubElement(url, "priority") + priority.text = '1.0' + video = ET.SubElement(url, "video:video") + #el = ET.SubElement(video, "video:content_loc") + #el.text = absolute_url("%s/video" % i.itemId) + el = ET.SubElement(video, "video:player_loc") + el.attrib['allow_embed'] = 'no' + el.text = absolute_url("%s/video" % i.itemId) + el = ET.SubElement(video, "video:title") + el.text = i.get('title') + el = ET.SubElement(video, "video:thumbnail_loc") + icon = settings.CONFIG['user']['ui']['icons'] == 'frames' and 'icon' or 'poster' + el.text = absolute_url("%s/%s128.jpg" %(i.itemId, icon)) + description = i.get('description', i.get('summary', '')) + if description: + el = ET.SubElement(video, "video:description") + el.text = i.get('description', i.get('summary', '')) + el = ET.SubElement(video, "video:family_friendly") + el.text = 'Yes' + duration = i.get('duration') + if duration: + el = ET.SubElement(video, "video:duration") + el.text = "%s" % duration + + with gzip.open(sitemap, 'wb') as f: + f.write('\n' + ET.tostring(urlset)) + diff --git a/pandora/item/views.py b/pandora/item/views.py index 5c3c52b52..f5fb6c165 100644 --- a/pandora/item/views.py +++ b/pandora/item/views.py @@ -6,6 +6,7 @@ from datetime import datetime, timedelta import mimetypes import random from urlparse import urlparse +import time import Image from django.db.models import Count, Sum, Max @@ -854,6 +855,18 @@ def oembed(request): ) return HttpResponse(json.dumps(oembed, indent=2), 'application/json') +def sitemap_xml_gz(request): + sitemap = os.path.abspath(os.path.join(settings.MEDIA_ROOT, 'sitemap.xml.gz')) + age = time.mktime(time.localtime()) - os.stat(sitemap).st_ctime + if not os.path.exists(sitemap): + tasks.update_sitemap(request.build_absolute_uri('/')) + elif age > 24*60*60: + tasks.update_sitemap.delay(request.build_absolute_uri('/')) + response = HttpFileResponse(sitemap) + response['Content-Type'] = 'application/xml' + response['Content-Encoding'] = 'x-gzip' + return response + def item(request, id): id = id.split('/')[0] template = 'index.html' diff --git a/pandora/urls.py b/pandora/urls.py index 0e3f849ba..5a1814383 100644 --- a/pandora/urls.py +++ b/pandora/urls.py @@ -30,6 +30,8 @@ urlpatterns = patterns('', (r'^favicon.ico$', serve_static_file, {'location': os.path.join(settings.STATIC_ROOT, 'png/icon.16.png'), 'content_type': 'image/x-icon'}), (r'^opensearch.xml$', 'app.views.opensearch_xml'), (r'^oembed$', 'item.views.oembed'), + (r'^robots.txt$', 'app.views.robots_txt'), + (r'^sitemap.xml.gz$', 'item.views.sitemap_xml_gz'), (r'', include('item.urls')), ) #if settings.DEBUG: