From c8f1a421421a7c45021554c1d3d4ef60639b06c1 Mon Sep 17 00:00:00 2001
From: j <0x006A@0x2620.org>
Date: Mon, 1 Jul 2013 12:06:47 +0200
Subject: [PATCH] cinematools
---
README | 1 +
films_by_country.py | 52 +++++++++++++++++++++++++++++++++++++++++++++
import_json.py | 40 ++++++++++++++++++++++++++++++++++
stats.py | 26 +++++++++++++++++++++++
4 files changed, 119 insertions(+)
create mode 100644 README
create mode 100755 films_by_country.py
create mode 100644 import_json.py
create mode 100644 stats.py
diff --git a/README b/README
new file mode 100644
index 0000000..9efe036
--- /dev/null
+++ b/README
@@ -0,0 +1 @@
+collection tools to create *cine.ma sites
diff --git a/films_by_country.py b/films_by_country.py
new file mode 100755
index 0000000..7affa5b
--- /dev/null
+++ b/films_by_country.py
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+import ox.web.imdb
+import re
+import json
+import sys
+from optparse import OptionParser
+
+'''
+python allofcountry.py in idsofindia.json
+python allofcountry.py tr idsofturkey.json
+'''
+
+def reset_url(url):
+ x = ox.web.imdb.read_url(url, timeout=0)
+
+if __name__ == '__main__':
+ usage = "usage: %prog [options] country output.json"
+ parser = OptionParser(usage=usage)
+ parser.add_option('-r', '--reset', dest='reset', default=None, help="reset given url")
+ (opts, args) = parser.parse_args()
+ if len(args) != 2:
+ parser.print_help()
+ sys.exit(1)
+
+ films = []
+ country, output = args
+
+ base_url = 'http://akas.imdb.com'
+ url = '%s/search/title?countries=%s&sort=year' % (base_url, country)
+ data = ox.web.imdb.read_url(url)
+ n = True
+ while n:
+ n = re.compile('Next »').findall(data)
+ if n:
+ n = '%s%s' % (base_url, n[0].split('href="')[-1])
+
+ results = re.compile('
', re.DOTALL).findall(data)
+ if results:
+ films += re.compile('href="/title/tt(\d{7})/" title="(.*?)"').findall(results[0])
+ print n
+ print len(films), 'films'
+ if n:
+ data = ox.web.imdb.read_url(n)
+ else:
+ with open('last.html', 'w') as f:
+ f.write(data)
+ if len(films) % 1000 == 0:
+ with open(filename, 'w') as f:
+ json.dump(films, f, indent=2)
+
+ with open(filename, 'w') as f:
+ json.dump(films, f, indent=2)
diff --git a/import_json.py b/import_json.py
new file mode 100644
index 0000000..61a1703
--- /dev/null
+++ b/import_json.py
@@ -0,0 +1,40 @@
+import ox
+import json
+
+def load(data_json):
+
+ def reset_table(table_name):
+ cursor = connection.cursor()
+ sql = "select setval('%s_id_seq', 1, false)" % table_name
+ cursor.execute(sql)
+
+ from django.db import connection, transaction
+ import item.models as models
+ import archive.models
+ import os
+ archive.models.File.objects.all().delete()
+ archive.models.Instance.objects.all().delete()
+ archive.models.Volume.objects.all().delete()
+ models.Item.objects.all().delete()
+ reset_table(archive.models.File._meta.db_table)
+ reset_table(archive.models.Instance._meta.db_table)
+ reset_table(archive.models.Volume._meta.db_table)
+ reset_table(models.Item._meta.db_table)
+ transaction.commit_unless_managed()
+ os.system('rm -r /srv/pandora/data/files')
+ os.system('rm -r /srv/pandora/data/items')
+
+ films = json.load(open(data_json))
+ for data in films:
+ item = models.Item()
+ item.data = data
+ item.save()
+ item.make_poster(True)
+ item.make_icon()
+ item.level = 2
+ item.save()
+ print item
+
+if __name__ == '__main__':
+ print 'please import from ./manage.py annd run import_json.load(path_to_json)'
+
diff --git a/stats.py b/stats.py
new file mode 100644
index 0000000..794643e
--- /dev/null
+++ b/stats.py
@@ -0,0 +1,26 @@
+import json
+import sys
+
+if len(sys.args) != 2:
+ print "usage: %s idsofcountry.json" % sys.args[0]
+ sys.exit(1)
+
+idsofcountry = sys.argv[1]
+data = json.load(open(idsofcountry))
+
+mini_series = filter(lambda x: 'Mini-Series' in x[1], data)
+tv_series = filter(lambda x: 'TV Series' in x[1], data)
+tv_movies = filter(lambda x: 'TV Movie' in x[1], data)
+tv_special = filter(lambda x: 'TV Special' in x[1], data)
+#cinema = set(data) - set(mini_series) - set(tv_series) - set(tv_movies)
+
+
+print len(tv_special), 'TV Specials'
+print len(tv_series), 'TV Series'
+print len(tv_movies), 'TV Movies'
+print len(mini_series), 'Mini-Series'
+#print len(cinema), 'Cinema'
+print len(data) - len(mini_series) - len(tv_movies) - len(tv_series) - len(tv_special), 'Movies'
+print len(data), 'total'
+
+