import re import urllib from oxutils.cache import getUrl from oxutils.text import findString # to sniff itunes traffic, use something like # sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net # http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch%3Fmedia=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit&albumTerm=&pageName=Media+Search+Page-US-Music-PowerSearch&v2=PowerSearch&h5=appleitmsna%2Cappleitmsus&c2=PowerSearch HTTP/1.1 ITUNES_HEADERS = { 'X-Apple-Tz': '0', 'X-Apple-Storefront': '143441-1', 'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)', 'Accept-Language': 'en-us, en;q=0.50', 'Accept-Encoding': 'gzip', 'Connection': 'close', } def composeUrl(request, parameters): if request == 'advancedSearch': url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + urllib.urlencode({ 'albumTerm': parameters['title'], 'allArtistNames': parameters['artist'], 'composerTerm': '', 'flavor': 0, 'genreIndex': 1, 'media': 'music', 'mediaType': 2, 'ringtone': 0, 'searchButton': 'submit', 'songTerm': '', }) elif request == 'viewAlbum': url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id'] return url def parseXmlDict(xml): values = {} strings = xml.split('') for string in strings: if string.find('') != -1: key = findString(string, '', '') type = findString(string, '<', '>') if type == 'true/': value = True else: value = findString(string, '<%s>' % type, '' % type) if type == 'integer': value = int(value) elif type == 'string': value = value.replace('&', '&') value = value.replace(''', '\'') values[key] = value return values class ItunesAlbum: def __init__(self, title, artist): self.title = title self.artist = artist self.id = self.getId() def getId(self): url = composeUrl('advancedSearch', {'title': self.title, 'artist': self.artist}) xml = getUrl(url, None, ITUNES_HEADERS) id = findString(xml, 'viewAlbum?id=', '&') return id def getData(self): data = {'id': self.id} url = composeUrl('viewAlbum', {'id': self.id}) xml = getUrl(url, None, ITUNES_HEADERS) xml = findString(xml, '') data['albumName'] = findString(xml, '', '<') data['artistName'] = findString(xml, '', '<') data['coverUrl'] = findString(xml, 'reflection="1" url="', '"') data['genre'] = findString(xml, 'Genre: ', '<') data['releaseDate'] = findString(xml, 'Released', '<') data['review'] = findString(findString(xml, 'REVIEW'), '', '') data['tracks'] = [] string = findString(findString(xml, 'items', ''), '') strings = string.split('') for string in strings: data['tracks'].append(parseXmlDict(string)) data['type'] = findString(xml, 'listType', '<') return data if __name__ == '__main__': test = ItunesAlbum('So Red the Rose', 'Arcadia') print test.getData()