commit c8f1a421421a7c45021554c1d3d4ef60639b06c1 Author: j <0x006A@0x2620.org> Date: Mon Jul 1 12:06:47 2013 +0200 cinematools diff --git a/README b/README new file mode 100644 index 0000000..9efe036 --- /dev/null +++ b/README @@ -0,0 +1 @@ +collection tools to create *cine.ma sites diff --git a/films_by_country.py b/films_by_country.py new file mode 100755 index 0000000..7affa5b --- /dev/null +++ b/films_by_country.py @@ -0,0 +1,52 @@ +#!/usr/bin/python +import ox.web.imdb +import re +import json +import sys +from optparse import OptionParser + +''' +python allofcountry.py in idsofindia.json +python allofcountry.py tr idsofturkey.json +''' + +def reset_url(url): + x = ox.web.imdb.read_url(url, timeout=0) + +if __name__ == '__main__': + usage = "usage: %prog [options] country output.json" + parser = OptionParser(usage=usage) + parser.add_option('-r', '--reset', dest='reset', default=None, help="reset given url") + (opts, args) = parser.parse_args() + if len(args) != 2: + parser.print_help() + sys.exit(1) + + films = [] + country, output = args + + base_url = 'http://akas.imdb.com' + url = '%s/search/title?countries=%s&sort=year' % (base_url, country) + data = ox.web.imdb.read_url(url) + n = True + while n: + n = re.compile('Next »').findall(data) + if n: + n = '%s%s' % (base_url, n[0].split('href="')[-1]) + + results = re.compile('(.*?)
', re.DOTALL).findall(data) + if results: + films += re.compile('href="/title/tt(\d{7})/" title="(.*?)"').findall(results[0]) + print n + print len(films), 'films' + if n: + data = ox.web.imdb.read_url(n) + else: + with open('last.html', 'w') as f: + f.write(data) + if len(films) % 1000 == 0: + with open(filename, 'w') as f: + json.dump(films, f, indent=2) + + with open(filename, 'w') as f: + json.dump(films, f, indent=2) diff --git a/import_json.py b/import_json.py new file mode 100644 index 0000000..61a1703 --- /dev/null +++ b/import_json.py @@ -0,0 +1,40 @@ +import ox +import json + +def load(data_json): + + def reset_table(table_name): + cursor = connection.cursor() + sql = "select setval('%s_id_seq', 1, false)" % table_name + cursor.execute(sql) + + from django.db import connection, transaction + import item.models as models + import archive.models + import os + archive.models.File.objects.all().delete() + archive.models.Instance.objects.all().delete() + archive.models.Volume.objects.all().delete() + models.Item.objects.all().delete() + reset_table(archive.models.File._meta.db_table) + reset_table(archive.models.Instance._meta.db_table) + reset_table(archive.models.Volume._meta.db_table) + reset_table(models.Item._meta.db_table) + transaction.commit_unless_managed() + os.system('rm -r /srv/pandora/data/files') + os.system('rm -r /srv/pandora/data/items') + + films = json.load(open(data_json)) + for data in films: + item = models.Item() + item.data = data + item.save() + item.make_poster(True) + item.make_icon() + item.level = 2 + item.save() + print item + +if __name__ == '__main__': + print 'please import from ./manage.py annd run import_json.load(path_to_json)' + diff --git a/stats.py b/stats.py new file mode 100644 index 0000000..794643e --- /dev/null +++ b/stats.py @@ -0,0 +1,26 @@ +import json +import sys + +if len(sys.args) != 2: + print "usage: %s idsofcountry.json" % sys.args[0] + sys.exit(1) + +idsofcountry = sys.argv[1] +data = json.load(open(idsofcountry)) + +mini_series = filter(lambda x: 'Mini-Series' in x[1], data) +tv_series = filter(lambda x: 'TV Series' in x[1], data) +tv_movies = filter(lambda x: 'TV Movie' in x[1], data) +tv_special = filter(lambda x: 'TV Special' in x[1], data) +#cinema = set(data) - set(mini_series) - set(tv_series) - set(tv_movies) + + +print len(tv_special), 'TV Specials' +print len(tv_series), 'TV Series' +print len(tv_movies), 'TV Movies' +print len(mini_series), 'Mini-Series' +#print len(cinema), 'Cinema' +print len(data) - len(mini_series) - len(tv_movies) - len(tv_series) - len(tv_special), 'Movies' +print len(data), 'total' + +