add parser for trailers.apple.com
This commit is contained in:
parent
faeed07934
commit
9c5f5097c7
1 changed files with 32 additions and 8 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ox.cache import readUrlUnicode
|
from ox.cache import readUrlUnicode
|
||||||
|
@ -10,9 +11,12 @@ HEADERS = {
|
||||||
'X-Apple-Tz': '7200',
|
'X-Apple-Tz': '7200',
|
||||||
'Accept-Encoding': 'gzip, deflate'
|
'Accept-Encoding': 'gzip, deflate'
|
||||||
}
|
}
|
||||||
|
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
|
||||||
|
USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
|
||||||
|
|
||||||
def getMovieData(title, director):
|
def getMovieData(title, director):
|
||||||
data = None
|
data = {}
|
||||||
|
# itunes section (preferred source for link)
|
||||||
url = 'http://ax.search.itunes.apple.com/WebObjects/MZSearch.woa/wa/advancedSearch'
|
url = 'http://ax.search.itunes.apple.com/WebObjects/MZSearch.woa/wa/advancedSearch'
|
||||||
url += '?media=movie&movieTerm=' + title
|
url += '?media=movie&movieTerm=' + title
|
||||||
url += '&actorNames=&directorProducerName=' + director
|
url += '&actorNames=&directorProducerName=' + director
|
||||||
|
@ -23,17 +27,37 @@ def getMovieData(title, director):
|
||||||
regexp += '<img width=".*?" height=".*?" alt=".*?" class="artwork" src="(.*?)" /></div></a>'
|
regexp += '<img width=".*?" height=".*?" alt=".*?" class="artwork" src="(.*?)" /></div></a>'
|
||||||
results = re.compile(regexp).findall(html)
|
results = re.compile(regexp).findall(html)
|
||||||
if results:
|
if results:
|
||||||
data = {
|
data['link'] = results[0][0]
|
||||||
'link': results[0][0],
|
data['poster'] = results[0][1].replace('140x140', '600x600')
|
||||||
'poster': results[0][1].replace('140x140', '600x600')
|
|
||||||
}
|
|
||||||
html = readUrlUnicode(data['link'], headers=HEADERS)
|
html = readUrlUnicode(data['link'], headers=HEADERS)
|
||||||
regexp = 'video-preview-url="(.*?)"'
|
results = re.compile('video-preview-url="(.*?)"').findall(html)
|
||||||
results = re.compile(regexp).findall(html)
|
|
||||||
if results:
|
if results:
|
||||||
data['trailer'] = results[0]
|
data['trailer'] = results[0]
|
||||||
|
# trailers section (preferred source for poster and trailer)
|
||||||
|
host = 'http://trailers.apple.com'
|
||||||
|
url = host + '/trailers/home/scripts/quickfind.php?callback=searchCallback&q=' + title
|
||||||
|
js = json.loads(readUrlUnicode(url)[16:-4])
|
||||||
|
results = js['results']
|
||||||
|
if results:
|
||||||
|
url = host + results[0]['location']
|
||||||
|
if not 'link' in data:
|
||||||
|
data['link'] = url
|
||||||
|
headers = {
|
||||||
|
'User-Agent': USER_AGENT
|
||||||
|
}
|
||||||
|
html = readUrlUnicode(url, headers=headers)
|
||||||
|
results = re.compile('"(' + host + '.*?poster\.jpg)"').findall(html)
|
||||||
|
if results:
|
||||||
|
data['poster'] = results[0].replace('poster.jpg', 'poster-xlarge.jpg')
|
||||||
|
html = readUrlUnicode(url + 'includes/playlists/web.inc', headers=headers)
|
||||||
|
results = re.compile('"(' + host + '\S+\.mov)"').findall(html)
|
||||||
|
if results:
|
||||||
|
data['trailer'] = results[-1]
|
||||||
return data
|
return data
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print getMovieData('Alphaville', 'Jean-Luc Godard')
|
print getMovieData('Alphaville', 'Jean-Luc Godard')
|
||||||
print getMovieData('Sin City', 'Roberto Rodriguez')
|
print getMovieData('Sin City', 'Roberto Rodriguez')
|
||||||
|
print getMovieData('Breathless', 'Jean-Luc Godard')
|
||||||
|
print getMovieData('Capitalism: A Love Story', 'Michael Moore')
|
||||||
|
print getMovieData('Film Socialisme', 'Jean-Luc Godard')
|
||||||
|
|
Loading…
Reference in a new issue