2014-09-30 19:27:26 +00:00
|
|
|
from __future__ import print_function
|
2011-07-30 08:12:24 +00:00
|
|
|
import json
|
2011-07-30 06:42:44 +00:00
|
|
|
import re
|
|
|
|
|
2016-06-08 13:32:46 +00:00
|
|
|
from six import text_type
|
2012-08-14 13:58:05 +00:00
|
|
|
from ox.cache import read_url
|
2011-07-30 06:42:44 +00:00
|
|
|
|
|
|
|
HEADERS = {
|
|
|
|
'User-Agent': 'iTunes/10.4 (Macintosh; Intel Mac OS X 10.7) AppleWebKit/534.48.3',
|
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
|
|
'Accept-Language': 'en-us, en;q=0.50',
|
|
|
|
'X-Apple-Store-Front': '143441-1,12',
|
|
|
|
'X-Apple-Tz': '7200',
|
|
|
|
'Accept-Encoding': 'gzip, deflate'
|
|
|
|
}
|
2011-07-30 08:12:24 +00:00
|
|
|
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
|
|
|
|
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
|
2011-07-30 06:42:44 +00:00
|
|
|
|
2012-08-15 15:15:40 +00:00
|
|
|
def get_movie_data(title, director):
|
2016-06-08 13:32:46 +00:00
|
|
|
if isinstance(title, text_type):
|
2011-07-30 11:17:25 +00:00
|
|
|
title = title.encode('utf-8')
|
2016-06-08 13:32:46 +00:00
|
|
|
if isinstance(director, text_type):
|
2011-07-30 11:17:25 +00:00
|
|
|
director = director.encode('utf-8')
|
2011-07-30 08:12:24 +00:00
|
|
|
data = {}
|
|
|
|
# itunes section (preferred source for link)
|
2011-07-30 06:42:44 +00:00
|
|
|
url = 'http://ax.search.itunes.apple.com/WebObjects/MZSearch.woa/wa/advancedSearch'
|
|
|
|
url += '?media=movie&movieTerm=' + title
|
|
|
|
url += '&actorNames=&directorProducerName=' + director
|
|
|
|
url += '&releaseYearTerm=&descriptionTerm=&genreIndex=1&ratingIndex=1'
|
|
|
|
HEADERS['Referer'] = url
|
2012-08-14 13:58:05 +00:00
|
|
|
html = read_url(url, headers=HEADERS, unicode=True)
|
2011-07-30 06:42:44 +00:00
|
|
|
regexp = '<a href="(http://itunes.apple.com/us/movie/.*?)" class="artwork-link"><div class="artwork">'
|
|
|
|
regexp += '<img width=".*?" height=".*?" alt=".*?" class="artwork" src="(.*?)" /></div></a>'
|
|
|
|
results = re.compile(regexp).findall(html)
|
|
|
|
if results:
|
2011-07-30 08:12:24 +00:00
|
|
|
data['link'] = results[0][0]
|
|
|
|
data['poster'] = results[0][1].replace('140x140', '600x600')
|
2012-08-14 13:58:05 +00:00
|
|
|
html = read_url(data['link'], headers=HEADERS, unicode=True)
|
2011-07-30 08:12:24 +00:00
|
|
|
results = re.compile('video-preview-url="(.*?)"').findall(html)
|
2011-07-30 06:42:44 +00:00
|
|
|
if results:
|
|
|
|
data['trailer'] = results[0]
|
2011-07-30 08:12:24 +00:00
|
|
|
# trailers section (preferred source for poster and trailer)
|
|
|
|
host = 'http://trailers.apple.com'
|
|
|
|
url = host + '/trailers/home/scripts/quickfind.php?callback=searchCallback&q=' + title
|
2012-08-14 13:58:05 +00:00
|
|
|
js = json.loads(read_url(url, unicode=True)[16:-4])
|
2011-07-30 08:12:24 +00:00
|
|
|
results = js['results']
|
|
|
|
if results:
|
|
|
|
url = host + results[0]['location']
|
2016-06-08 13:32:46 +00:00
|
|
|
if 'link' not in data:
|
2011-07-30 08:12:24 +00:00
|
|
|
data['link'] = url
|
|
|
|
headers = {
|
|
|
|
'User-Agent': USER_AGENT
|
|
|
|
}
|
2012-08-14 13:58:05 +00:00
|
|
|
html = read_url(url, headers=headers, unicode=True)
|
2011-07-30 08:12:24 +00:00
|
|
|
results = re.compile('"(' + host + '.*?poster\.jpg)"').findall(html)
|
|
|
|
if results:
|
|
|
|
data['poster'] = results[0].replace('poster.jpg', 'poster-xlarge.jpg')
|
2012-08-14 13:58:05 +00:00
|
|
|
html = read_url(url + 'includes/playlists/web.inc', headers=headers, unicode=True)
|
2011-07-30 08:12:24 +00:00
|
|
|
results = re.compile('"(' + host + '\S+\.mov)"').findall(html)
|
|
|
|
if results:
|
|
|
|
data['trailer'] = results[-1]
|
2011-07-30 06:42:44 +00:00
|
|
|
return data
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2014-09-30 19:27:26 +00:00
|
|
|
print(get_movie_data('Alphaville', 'Jean-Luc Godard'))
|
|
|
|
print(get_movie_data('Sin City', 'Roberto Rodriguez'))
|
|
|
|
print(get_movie_data('Breathless', 'Jean-Luc Godard'))
|
|
|
|
print(get_movie_data('Capitalism: A Love Story', 'Michael Moore'))
|
|
|
|
print(get_movie_data('Film Socialisme', 'Jean-Luc Godard'))
|