2008-04-29 13:08:23 +00:00
|
|
|
import re
|
|
|
|
import urllib
|
|
|
|
|
|
|
|
from oxutils.cache import getUrl
|
2008-05-07 08:15:25 +00:00
|
|
|
from oxutils.html import decodeHtml, stripTags
|
|
|
|
from oxutils.text import findRe
|
2008-04-29 13:08:23 +00:00
|
|
|
from oxutils.text import findString
|
|
|
|
|
|
|
|
# to sniff itunes traffic, use something like
|
|
|
|
# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net
|
|
|
|
|
|
|
|
# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch%3Fmedia=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit&albumTerm=&pageName=Media+Search+Page-US-Music-PowerSearch&v2=PowerSearch&h5=appleitmsna%2Cappleitmsus&c2=PowerSearch HTTP/1.1
|
|
|
|
|
|
|
|
|
|
|
|
ITUNES_HEADERS = {
|
|
|
|
'X-Apple-Tz': '0',
|
|
|
|
'X-Apple-Storefront': '143441-1',
|
|
|
|
'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',
|
|
|
|
'Accept-Language': 'en-us, en;q=0.50',
|
|
|
|
'Accept-Encoding': 'gzip',
|
|
|
|
'Connection': 'close',
|
|
|
|
}
|
|
|
|
|
|
|
|
def composeUrl(request, parameters):
|
|
|
|
if request == 'advancedSearch':
|
|
|
|
url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + urllib.urlencode({
|
|
|
|
'albumTerm': parameters['title'],
|
|
|
|
'allArtistNames': parameters['artist'],
|
|
|
|
'composerTerm': '',
|
|
|
|
'flavor': 0,
|
|
|
|
'genreIndex': 1,
|
|
|
|
'media': 'music',
|
|
|
|
'mediaType': 2,
|
|
|
|
'ringtone': 0,
|
|
|
|
'searchButton': 'submit',
|
|
|
|
'songTerm': '',
|
|
|
|
})
|
|
|
|
elif request == 'viewAlbum':
|
|
|
|
url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
|
|
|
|
return url
|
|
|
|
|
|
|
|
def parseXmlDict(xml):
|
|
|
|
values = {}
|
|
|
|
strings = xml.split('<key>')
|
|
|
|
for string in strings:
|
|
|
|
if string.find('</key>') != -1:
|
2008-05-07 08:15:25 +00:00
|
|
|
key = findRe(string, '(.*?)</key>')
|
|
|
|
type = findRe(string, '</key><(.*?)>')
|
2008-04-29 13:08:23 +00:00
|
|
|
if type == 'true/':
|
|
|
|
value = True
|
|
|
|
else:
|
2008-05-07 08:15:25 +00:00
|
|
|
value = findRe(string, '<%s>(.*?)</%s>' % (type, type))
|
2008-04-29 13:08:23 +00:00
|
|
|
if type == 'integer':
|
|
|
|
value = int(value)
|
|
|
|
elif type == 'string':
|
2008-05-07 08:15:25 +00:00
|
|
|
value = decodeHtml(value)
|
2008-04-29 13:08:23 +00:00
|
|
|
values[key] = value
|
|
|
|
return values
|
|
|
|
|
|
|
|
class ItunesAlbum:
|
|
|
|
def __init__(self, title, artist):
|
|
|
|
self.title = title
|
|
|
|
self.artist = artist
|
|
|
|
self.id = self.getId()
|
|
|
|
|
|
|
|
def getId(self):
|
|
|
|
url = composeUrl('advancedSearch', {'title': self.title, 'artist': self.artist})
|
|
|
|
xml = getUrl(url, None, ITUNES_HEADERS)
|
2008-05-07 08:15:25 +00:00
|
|
|
id = findRe(xml, 'viewAlbum\?id=(.*?)&')
|
2008-04-29 13:08:23 +00:00
|
|
|
return id
|
|
|
|
|
|
|
|
def getData(self):
|
|
|
|
data = {'id': self.id}
|
|
|
|
url = composeUrl('viewAlbum', {'id': self.id})
|
|
|
|
xml = getUrl(url, None, ITUNES_HEADERS)
|
2008-05-07 08:15:25 +00:00
|
|
|
data['albumName'] = findRe(xml, '<B>(.*?)</B>')
|
|
|
|
data['artistName'] = findRe(xml, '<b>(.*?)</b>')
|
|
|
|
data['coverUrl'] = findRe(xml, 'reflection="1" url="(.*?)"')
|
|
|
|
data['genre'] = findRe(xml, 'Genre:(.*?)<')
|
|
|
|
data['releaseDate'] = findRe(xml, 'Released(.*?)<')
|
|
|
|
data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
|
2008-04-29 13:08:23 +00:00
|
|
|
data['tracks'] = []
|
2008-05-07 08:15:25 +00:00
|
|
|
strings = findRe(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
|
2008-04-29 13:08:23 +00:00
|
|
|
for string in strings:
|
2008-05-07 08:15:25 +00:00
|
|
|
data['tracks'].append(parseXmlDict(string))
|
|
|
|
data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')
|
2008-04-29 13:08:23 +00:00
|
|
|
return data
|
|
|
|
|
2008-04-29 13:16:51 +00:00
|
|
|
if __name__ == '__main__':
|
2008-05-07 08:15:25 +00:00
|
|
|
import simplejson
|
|
|
|
data = ItunesAlbum('So Red the Rose', 'Arcadia').getData()
|
|
|
|
print simplejson.dumps(data, sort_keys = True, indent = 4)
|
|
|
|
# print test.getData()
|