cinematools
This commit is contained in:
commit
c8f1a42142
4 changed files with 119 additions and 0 deletions
1
README
Normal file
1
README
Normal file
|
@ -0,0 +1 @@
|
||||||
|
collection tools to create *cine.ma sites
|
52
films_by_country.py
Executable file
52
films_by_country.py
Executable file
|
@ -0,0 +1,52 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
import ox.web.imdb
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
'''
|
||||||
|
python allofcountry.py in idsofindia.json
|
||||||
|
python allofcountry.py tr idsofturkey.json
|
||||||
|
'''
|
||||||
|
|
||||||
|
def reset_url(url):
|
||||||
|
x = ox.web.imdb.read_url(url, timeout=0)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
usage = "usage: %prog [options] country output.json"
|
||||||
|
parser = OptionParser(usage=usage)
|
||||||
|
parser.add_option('-r', '--reset', dest='reset', default=None, help="reset given url")
|
||||||
|
(opts, args) = parser.parse_args()
|
||||||
|
if len(args) != 2:
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
films = []
|
||||||
|
country, output = args
|
||||||
|
|
||||||
|
base_url = 'http://akas.imdb.com'
|
||||||
|
url = '%s/search/title?countries=%s&sort=year' % (base_url, country)
|
||||||
|
data = ox.web.imdb.read_url(url)
|
||||||
|
n = True
|
||||||
|
while n:
|
||||||
|
n = re.compile('<a href="(.*?)">Next »</a>').findall(data)
|
||||||
|
if n:
|
||||||
|
n = '%s%s' % (base_url, n[0].split('href="')[-1])
|
||||||
|
|
||||||
|
results = re.compile('<table class="results">(.*?)</table>', re.DOTALL).findall(data)
|
||||||
|
if results:
|
||||||
|
films += re.compile('href="/title/tt(\d{7})/" title="(.*?)"').findall(results[0])
|
||||||
|
print n
|
||||||
|
print len(films), 'films'
|
||||||
|
if n:
|
||||||
|
data = ox.web.imdb.read_url(n)
|
||||||
|
else:
|
||||||
|
with open('last.html', 'w') as f:
|
||||||
|
f.write(data)
|
||||||
|
if len(films) % 1000 == 0:
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
json.dump(films, f, indent=2)
|
||||||
|
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
json.dump(films, f, indent=2)
|
40
import_json.py
Normal file
40
import_json.py
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
import ox
|
||||||
|
import json
|
||||||
|
|
||||||
|
def load(data_json):
|
||||||
|
|
||||||
|
def reset_table(table_name):
|
||||||
|
cursor = connection.cursor()
|
||||||
|
sql = "select setval('%s_id_seq', 1, false)" % table_name
|
||||||
|
cursor.execute(sql)
|
||||||
|
|
||||||
|
from django.db import connection, transaction
|
||||||
|
import item.models as models
|
||||||
|
import archive.models
|
||||||
|
import os
|
||||||
|
archive.models.File.objects.all().delete()
|
||||||
|
archive.models.Instance.objects.all().delete()
|
||||||
|
archive.models.Volume.objects.all().delete()
|
||||||
|
models.Item.objects.all().delete()
|
||||||
|
reset_table(archive.models.File._meta.db_table)
|
||||||
|
reset_table(archive.models.Instance._meta.db_table)
|
||||||
|
reset_table(archive.models.Volume._meta.db_table)
|
||||||
|
reset_table(models.Item._meta.db_table)
|
||||||
|
transaction.commit_unless_managed()
|
||||||
|
os.system('rm -r /srv/pandora/data/files')
|
||||||
|
os.system('rm -r /srv/pandora/data/items')
|
||||||
|
|
||||||
|
films = json.load(open(data_json))
|
||||||
|
for data in films:
|
||||||
|
item = models.Item()
|
||||||
|
item.data = data
|
||||||
|
item.save()
|
||||||
|
item.make_poster(True)
|
||||||
|
item.make_icon()
|
||||||
|
item.level = 2
|
||||||
|
item.save()
|
||||||
|
print item
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
print 'please import from ./manage.py annd run import_json.load(path_to_json)'
|
||||||
|
|
26
stats.py
Normal file
26
stats.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if len(sys.args) != 2:
|
||||||
|
print "usage: %s idsofcountry.json" % sys.args[0]
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
idsofcountry = sys.argv[1]
|
||||||
|
data = json.load(open(idsofcountry))
|
||||||
|
|
||||||
|
mini_series = filter(lambda x: 'Mini-Series' in x[1], data)
|
||||||
|
tv_series = filter(lambda x: 'TV Series' in x[1], data)
|
||||||
|
tv_movies = filter(lambda x: 'TV Movie' in x[1], data)
|
||||||
|
tv_special = filter(lambda x: 'TV Special' in x[1], data)
|
||||||
|
#cinema = set(data) - set(mini_series) - set(tv_series) - set(tv_movies)
|
||||||
|
|
||||||
|
|
||||||
|
print len(tv_special), 'TV Specials'
|
||||||
|
print len(tv_series), 'TV Series'
|
||||||
|
print len(tv_movies), 'TV Movies'
|
||||||
|
print len(mini_series), 'Mini-Series'
|
||||||
|
#print len(cinema), 'Cinema'
|
||||||
|
print len(data) - len(mini_series) - len(tv_movies) - len(tv_series) - len(tv_special), 'Movies'
|
||||||
|
print len(data), 'total'
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue