diff --git a/ox/itunes.py b/ox/itunes.py index da9c6b3..7caca9c 100644 --- a/ox/itunes.py +++ b/ox/itunes.py @@ -9,8 +9,8 @@ from oxutils.text import findString # to sniff itunes traffic, use something like # sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net -# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch%3Fmedia=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit&albumTerm=&pageName=Media+Search+Page-US-Music-PowerSearch&v2=PowerSearch&h5=appleitmsna%2Cappleitmsus&c2=PowerSearch HTTP/1.1 - +# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit +# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit ITUNES_HEADERS = { 'X-Apple-Tz': '0', @@ -23,20 +23,38 @@ ITUNES_HEADERS = { def composeUrl(request, parameters): if request == 'advancedSearch': - url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + urllib.urlencode({ - 'albumTerm': parameters['title'], - 'allArtistNames': parameters['artist'], - 'composerTerm': '', - 'flavor': 0, - 'genreIndex': 1, - 'media': 'music', - 'mediaType': 2, - 'ringtone': 0, - 'searchButton': 'submit', - 'songTerm': '', - }) + url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + if parameters['media'] == 'music': + url += urllib.urlencode({ + 'albumTerm': parameters['title'], + 'allArtistNames': parameters['artist'], + 'composerTerm': '', + 'flavor': 0, + 'genreIndex': 1, + 'media': 'music', + 'mediaType': 2, + 'ringtone': 0, + 'searchButton': 'submit', + 'songTerm': '' + }) + elif parameters['media'] == 'movie': + url += urllib.urlencode({ + 'actorTerm': '', + 'closedCaption': 0, + 'descriptionTerm': '', + 'directorProducerName': parameters['director'], + 'flavor': 0, + 'media': 'movie', + 'mediaType': 3, + 'movieTerm': parameters['title'], + 'ratingIndex': 1, + 'releaseYearTerm': '', + 'searchButton': 'submit' + }) elif request == 'viewAlbum': url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id'] + elif request == 'viewMovie': + url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&s=143441' % parameters['id'] return url def parseXmlDict(xml): @@ -57,15 +75,39 @@ def parseXmlDict(xml): values[key] = value return values +def parseCast(xml, title): + list = [] + try: + strings = findRe(xml, '%s(.*?)' % title[:-1].upper()).split('') + strings.pop() + for string in strings: + list.append(findRe(string, '(.*?)')) + return list + except: + return list + +def parseMovies(xml, title): + list = [] + strings = findRe(xml, '%s(.*?)' % title[:-1].upper()).split('') + strings.pop() + for string in strings: + list.append({ + 'id': findRe(string, 'viewMovie\?id=(.*?)&'), + 'title': findRe(string, '(.*?)') + }) + return list + class ItunesAlbum: - def __init__(self, title, artist): + def __init__(self, id = '', title = '', artist = ''): + self.id = id self.title = title self.artist = artist - self.id = self.getId() + if not id: + self.id = self.getId() def getId(self): - url = composeUrl('advancedSearch', {'title': self.title, 'artist': self.artist}) - xml = getUrl(url, None, ITUNES_HEADERS) + url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist}) + xml = getUrl(url, headers = ITUNES_HEADERS) id = findRe(xml, 'viewAlbum\?id=(.*?)&') return id @@ -75,7 +117,7 @@ class ItunesAlbum: xml = getUrl(url, None, ITUNES_HEADERS) data['albumName'] = findRe(xml, '(.*?)') data['artistName'] = findRe(xml, '(.*?)') - data['coverUrl'] = findRe(xml, 'reflection="1" url="(.*?)"') + data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"') data['genre'] = findRe(xml, 'Genre:(.*?)<') data['releaseDate'] = findRe(xml, 'Released(.*?)<') data['review'] = stripTags(findRe(xml, 'REVIEW.*?(.*?)')) @@ -86,8 +128,48 @@ class ItunesAlbum: data['type'] = findRe(xml, 'listType(.*?)<') return data +class ItunesMovie: + def __init__(self, id = '', title = '', director = ''): + self.id = id + self.title = title + self.director = director + if not id: + self.id = self.getId() + + def getId(self): + url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director}) + xml = getUrl(url, headers = ITUNES_HEADERS) + id = findRe(xml, 'viewMovie\?id=(.*?)&') + return id + + def getData(self): + data = {'id': self.id} + url = composeUrl('viewMovie', {'id': self.id}) + xml = getUrl(url, None, ITUNES_HEADERS) + data['actors'] = parseCast(xml, 'actors') + string = findRe(xml, 'Average Rating:(.*?)') + data['averageRating'] = string.count('rating_star_000033.png') + string.count('½') * 0.5 + data['directors'] = parseCast(xml, 'directors') + data['format'] = findRe(xml, 'Format:(.*?)<') + data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<')) + data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY.*?(.*?)')) + data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"') + data['producers'] = parseCast(xml, 'producers') + data['rated'] = findRe(xml, 'Rated(.*?)<') + data['relatedMovies'] = parseMovies(xml, 'related movies') + data['releaseDate'] = findRe(xml, 'Released(.*?)<') + data['runTime'] = findRe(xml, 'Run Time:(.*?)<') + data['screenwriters'] = parseCast(xml, 'screenwriters') + data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&') + return data + if __name__ == '__main__': import simplejson - data = ItunesAlbum('So Red the Rose', 'Arcadia').getData() + data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData() print simplejson.dumps(data, sort_keys = True, indent = 4) + data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData() + print simplejson.dumps(data, sort_keys = True, indent = 4) + for v in data['relatedMovies']: + data = ItunesMovie(id = v['id']).getData() + print simplejson.dumps(data, sort_keys = True, indent = 4) # print test.getData() \ No newline at end of file