diff --git a/import_scripts/london_imc_import.py b/import_scripts/london_imc_import.py
new file mode 100644
index 0000000..50f646a
--- /dev/null
+++ b/import_scripts/london_imc_import.py
@@ -0,0 +1,105 @@
+#!/usr/bin/python3
+from datetime import datetime
+import os
+import re
+
+import ox
+import lxml.html
+
+def parse_info(f):
+ info = {}
+ with open(f) as fd:
+ html = fd.read()
+ doc = lxml.html.document_fromstring(html)
+
+ mp4 = [a for a in doc.xpath('.//a') if a.text_content() == '"MP4"']
+ if not mp4:
+ return None
+ mp4 = mp4[0].attrib['href']
+ info['mp4'] = 'london.indymedia.org/shared' + mp4
+ if os.path.exists(info['mp4']):
+ info['oshash'] = ox.oshash(info['mp4'])
+ else:
+ print('video missing', info['mp4'])
+ return None
+ info['title'] = doc.xpath('.//h1[contains(@class, "title")]')[0].text_content().strip()
+ info['links'] = ['http://london.indymedia.org/videos/%s' % f.split('/')[-1].split('.')[0]]
+ info['summary'] = lxml.html.tostring(doc.xpath('.//div[contains(@class, "singlepost")]')[0].xpath('.//p')[-1]).decode().strip()[3:-4]
+
+ byline = doc.xpath('.//p[contains(@class, "byline")]')[0]
+ uploader = byline.xpath('.//strong')[0].text_content().strip()
+
+ info['summary'] += '
\n
\nCreator: %s' % uploader
+ info['depositor'] = uploader
+ info['collection'] = 'Indymedia London'
+
+ published = byline.xpath('.//small')[0].text_content().strip()
+ published = published.replace('Published:', '').strip()
+ published = datetime.strptime(published, '%B %d, %Y %H:%M').strftime('%Y-%m-%d %H:%M')
+ info['date'] = published
+
+ group = [a for a in doc.xpath('.//a') if '/groups/' in a.attrib.get('href', '')]
+ if group:
+ group = group[0].text_content().strip()
+ info['summary'] += '
\n
\nGroup: %s' % group
+
+ tags = [a for a in doc.xpath('.//a') if '/about/' in a.attrib.get('href', '')]
+ if tags:
+ info['themes'] = [t.text_content().strip().replace('_', ' ') for t in tags]
+ info['themes'] = [t for t in info['themes'] if t not in ('repression', 'solidarity')]
+ if not info['themes']:
+ del info['themes']
+
+ tags = [a for a in doc.xpath('.//a') if '/in/' in a.attrib.get('href', '')]
+ if tags:
+ info['location'] = ', '.join([t.text_content().strip().replace('_', ' ') for t in tags])
+
+ return info
+
+def parse_folder(base='london.indymedia.org/shared/system/cache/videos'):
+ index = []
+ for root, folders, files in os.walk(base):
+ for f in files:
+ f = os.path.join(root, f)
+ if f.endswith('.html'):
+ #print(f)
+ info = parse_info(f)
+ if info:
+ index.append(info)
+ return index
+
+
+def import_video(api, info):
+ if not api.findMedia({
+ 'query': {
+ 'conditions': [{'key': 'oshash', 'value': info['oshash']}]
+ }
+ })['data']['items']:
+ filename = info['mp4']
+ oshash = info['oshash']
+ avinfo = ox.avinfo(filename)
+ if 'path' in avinfo:
+ del avinfo['path']
+ r = api.addMedia({
+ 'id': oshash,
+ 'filename': os.path.basename(filename),
+ 'info': avinfo
+ })
+ item_id = r['data']['item']
+ url = '%supload/direct/' % api.url
+ if api.upload_chunks(url, filename, {'id': oshash}):
+ del info['oshash']
+ del info['mp4']
+ info['id'] = item_id
+ api.edit(info)
+ else:
+ return False
+ return True
+
+
+if __name__ == '__main__':
+ index = parse_folder()
+ api = ox.api.signin('https://amp.0x2620.org/api/')
+ for i in index:
+ if not import_video(api, i):
+ print('!! failed', i['links'])