cinematools

This commit is contained in:
j 2013-07-01 12:06:47 +02:00
commit c8f1a42142
4 changed files with 119 additions and 0 deletions

1
README Normal file
View file

@ -0,0 +1 @@
collection tools to create *cine.ma sites

52
films_by_country.py Executable file
View file

@ -0,0 +1,52 @@
#!/usr/bin/python
import ox.web.imdb
import re
import json
import sys
from optparse import OptionParser
'''
python allofcountry.py in idsofindia.json
python allofcountry.py tr idsofturkey.json
'''
def reset_url(url):
x = ox.web.imdb.read_url(url, timeout=0)
if __name__ == '__main__':
usage = "usage: %prog [options] country output.json"
parser = OptionParser(usage=usage)
parser.add_option('-r', '--reset', dest='reset', default=None, help="reset given url")
(opts, args) = parser.parse_args()
if len(args) != 2:
parser.print_help()
sys.exit(1)
films = []
country, output = args
base_url = 'http://akas.imdb.com'
url = '%s/search/title?countries=%s&sort=year' % (base_url, country)
data = ox.web.imdb.read_url(url)
n = True
while n:
n = re.compile('<a href="(.*?)">Next&nbsp;&raquo;</a>').findall(data)
if n:
n = '%s%s' % (base_url, n[0].split('href="')[-1])
results = re.compile('<table class="results">(.*?)</table>', re.DOTALL).findall(data)
if results:
films += re.compile('href="/title/tt(\d{7})/" title="(.*?)"').findall(results[0])
print n
print len(films), 'films'
if n:
data = ox.web.imdb.read_url(n)
else:
with open('last.html', 'w') as f:
f.write(data)
if len(films) % 1000 == 0:
with open(filename, 'w') as f:
json.dump(films, f, indent=2)
with open(filename, 'w') as f:
json.dump(films, f, indent=2)

40
import_json.py Normal file
View file

@ -0,0 +1,40 @@
import ox
import json
def load(data_json):
def reset_table(table_name):
cursor = connection.cursor()
sql = "select setval('%s_id_seq', 1, false)" % table_name
cursor.execute(sql)
from django.db import connection, transaction
import item.models as models
import archive.models
import os
archive.models.File.objects.all().delete()
archive.models.Instance.objects.all().delete()
archive.models.Volume.objects.all().delete()
models.Item.objects.all().delete()
reset_table(archive.models.File._meta.db_table)
reset_table(archive.models.Instance._meta.db_table)
reset_table(archive.models.Volume._meta.db_table)
reset_table(models.Item._meta.db_table)
transaction.commit_unless_managed()
os.system('rm -r /srv/pandora/data/files')
os.system('rm -r /srv/pandora/data/items')
films = json.load(open(data_json))
for data in films:
item = models.Item()
item.data = data
item.save()
item.make_poster(True)
item.make_icon()
item.level = 2
item.save()
print item
if __name__ == '__main__':
print 'please import from ./manage.py annd run import_json.load(path_to_json)'

26
stats.py Normal file
View file

@ -0,0 +1,26 @@
import json
import sys
if len(sys.args) != 2:
print "usage: %s idsofcountry.json" % sys.args[0]
sys.exit(1)
idsofcountry = sys.argv[1]
data = json.load(open(idsofcountry))
mini_series = filter(lambda x: 'Mini-Series' in x[1], data)
tv_series = filter(lambda x: 'TV Series' in x[1], data)
tv_movies = filter(lambda x: 'TV Movie' in x[1], data)
tv_special = filter(lambda x: 'TV Special' in x[1], data)
#cinema = set(data) - set(mini_series) - set(tv_series) - set(tv_movies)
print len(tv_special), 'TV Specials'
print len(tv_series), 'TV Series'
print len(tv_movies), 'TV Movies'
print len(mini_series), 'Mini-Series'
#print len(cinema), 'Cinema'
print len(data) - len(mini_series) - len(tv_movies) - len(tv_series) - len(tv_special), 'Movies'
print len(data), 'total'