add media backend
This commit is contained in:
parent
696717d138
commit
f257da3567
13 changed files with 133 additions and 2 deletions
40
app/event/management/commands/extract_urls.py
Normal file
40
app/event/management/commands/extract_urls.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
import csv
|
||||
import re
|
||||
import sys
|
||||
import ox
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
|
||||
from ... import models
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'extract urls'
|
||||
args = ''
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('--debug', action='store_true', dest='debug',
|
||||
default=False, help='debug something')
|
||||
|
||||
def handle(self, **options):
|
||||
urls = set()
|
||||
for event in models.Event.objects.all():
|
||||
for url in re.compile('href="(.*?)"').findall(event.body):
|
||||
urls.add(url)
|
||||
for url in re.compile('src="(.*?)"').findall(event.body):
|
||||
urls.add(url)
|
||||
|
||||
writer = csv.writer(sys.stdout)
|
||||
writer.writerow(['url', 'pandora', 'archive'])
|
||||
for url in sorted(urls):
|
||||
url = ox.decode_html(url)
|
||||
if url[0] in ('/', '#'):
|
||||
continue
|
||||
if 'youtube' in url or 'vimeo' in url:
|
||||
p = 'y'
|
||||
else:
|
||||
p = ''
|
||||
writer.writerow([url, p, 'https://web.archive.org/web/*/' + url])
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue