adding movies to itunes.py

This commit is contained in:
Rolux 2008-05-07 13:29:00 +02:00
parent d04877e1a2
commit b697803926

View File

@ -9,8 +9,8 @@ from oxutils.text import findString
# to sniff itunes traffic, use something like # to sniff itunes traffic, use something like
# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net # sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net
# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch%3Fmedia=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit&albumTerm=&pageName=Media+Search+Page-US-Music-PowerSearch&v2=PowerSearch&h5=appleitmsna%2Cappleitmsus&c2=PowerSearch HTTP/1.1 # http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit
# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit
ITUNES_HEADERS = { ITUNES_HEADERS = {
'X-Apple-Tz': '0', 'X-Apple-Tz': '0',
@ -23,20 +23,38 @@ ITUNES_HEADERS = {
def composeUrl(request, parameters): def composeUrl(request, parameters):
if request == 'advancedSearch': if request == 'advancedSearch':
url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + urllib.urlencode({ url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
'albumTerm': parameters['title'], if parameters['media'] == 'music':
'allArtistNames': parameters['artist'], url += urllib.urlencode({
'composerTerm': '', 'albumTerm': parameters['title'],
'flavor': 0, 'allArtistNames': parameters['artist'],
'genreIndex': 1, 'composerTerm': '',
'media': 'music', 'flavor': 0,
'mediaType': 2, 'genreIndex': 1,
'ringtone': 0, 'media': 'music',
'searchButton': 'submit', 'mediaType': 2,
'songTerm': '', 'ringtone': 0,
}) 'searchButton': 'submit',
'songTerm': ''
})
elif parameters['media'] == 'movie':
url += urllib.urlencode({
'actorTerm': '',
'closedCaption': 0,
'descriptionTerm': '',
'directorProducerName': parameters['director'],
'flavor': 0,
'media': 'movie',
'mediaType': 3,
'movieTerm': parameters['title'],
'ratingIndex': 1,
'releaseYearTerm': '',
'searchButton': 'submit'
})
elif request == 'viewAlbum': elif request == 'viewAlbum':
url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id'] url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
elif request == 'viewMovie':
url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&s=143441' % parameters['id']
return url return url
def parseXmlDict(xml): def parseXmlDict(xml):
@ -57,15 +75,39 @@ def parseXmlDict(xml):
values[key] = value values[key] = value
return values return values
def parseCast(xml, title):
list = []
try:
strings = findRe(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
strings.pop()
for string in strings:
list.append(findRe(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
return list
except:
return list
def parseMovies(xml, title):
list = []
strings = findRe(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
strings.pop()
for string in strings:
list.append({
'id': findRe(string, 'viewMovie\?id=(.*?)&'),
'title': findRe(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
})
return list
class ItunesAlbum: class ItunesAlbum:
def __init__(self, title, artist): def __init__(self, id = '', title = '', artist = ''):
self.id = id
self.title = title self.title = title
self.artist = artist self.artist = artist
self.id = self.getId() if not id:
self.id = self.getId()
def getId(self): def getId(self):
url = composeUrl('advancedSearch', {'title': self.title, 'artist': self.artist}) url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
xml = getUrl(url, None, ITUNES_HEADERS) xml = getUrl(url, headers = ITUNES_HEADERS)
id = findRe(xml, 'viewAlbum\?id=(.*?)&') id = findRe(xml, 'viewAlbum\?id=(.*?)&')
return id return id
@ -75,7 +117,7 @@ class ItunesAlbum:
xml = getUrl(url, None, ITUNES_HEADERS) xml = getUrl(url, None, ITUNES_HEADERS)
data['albumName'] = findRe(xml, '<B>(.*?)</B>') data['albumName'] = findRe(xml, '<B>(.*?)</B>')
data['artistName'] = findRe(xml, '<b>(.*?)</b>') data['artistName'] = findRe(xml, '<b>(.*?)</b>')
data['coverUrl'] = findRe(xml, 'reflection="1" url="(.*?)"') data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
data['genre'] = findRe(xml, 'Genre:(.*?)<') data['genre'] = findRe(xml, 'Genre:(.*?)<')
data['releaseDate'] = findRe(xml, 'Released(.*?)<') data['releaseDate'] = findRe(xml, 'Released(.*?)<')
data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>')) data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
@ -86,8 +128,48 @@ class ItunesAlbum:
data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<') data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')
return data return data
class ItunesMovie:
def __init__(self, id = '', title = '', director = ''):
self.id = id
self.title = title
self.director = director
if not id:
self.id = self.getId()
def getId(self):
url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
xml = getUrl(url, headers = ITUNES_HEADERS)
id = findRe(xml, 'viewMovie\?id=(.*?)&')
return id
def getData(self):
data = {'id': self.id}
url = composeUrl('viewMovie', {'id': self.id})
xml = getUrl(url, None, ITUNES_HEADERS)
data['actors'] = parseCast(xml, 'actors')
string = findRe(xml, 'Average Rating:(.*?)</HBoxView>')
data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
data['directors'] = parseCast(xml, 'directors')
data['format'] = findRe(xml, 'Format:(.*?)<')
data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<'))
data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
data['producers'] = parseCast(xml, 'producers')
data['rated'] = findRe(xml, 'Rated(.*?)<')
data['relatedMovies'] = parseMovies(xml, 'related movies')
data['releaseDate'] = findRe(xml, 'Released(.*?)<')
data['runTime'] = findRe(xml, 'Run Time:(.*?)<')
data['screenwriters'] = parseCast(xml, 'screenwriters')
data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&')
return data
if __name__ == '__main__': if __name__ == '__main__':
import simplejson import simplejson
data = ItunesAlbum('So Red the Rose', 'Arcadia').getData() data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()
print simplejson.dumps(data, sort_keys = True, indent = 4) print simplejson.dumps(data, sort_keys = True, indent = 4)
data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()
print simplejson.dumps(data, sort_keys = True, indent = 4)
for v in data['relatedMovies']:
data = ItunesMovie(id = v['id']).getData()
print simplejson.dumps(data, sort_keys = True, indent = 4)
# print test.getData() # print test.getData()