import csv
import re
import sys
import ox

from django.core.management.base import BaseCommand
from django.conf import settings

from ... import models


class Command(BaseCommand):
    help = 'extract urls'
    args = ''

    def add_arguments(self, parser):
        parser.add_argument('--debug', action='store_true', dest='debug',
                            default=False, help='debug something')

    def handle(self, **options):
        urls = set()
        for event in models.Event.objects.all():
            for url in re.compile('href="(.*?)"').findall(event.body):
                urls.add(url)
            for url in re.compile('src="(.*?)"').findall(event.body):
                urls.add(url)

        writer = csv.writer(sys.stdout)
        writer.writerow(['url', 'pandora', 'archive'])
        for url in sorted(urls):
            url = ox.decode_html(url)
            if url[0] in ('/', '#'):
                continue
            if 'youtube' in url or 'vimeo' in url:
                p = 'y'
            else:
                p = ''
            writer.writerow([url, p, 'https://web.archive.org/web/*/' + url])