python-oxweb/ox/itunes.py

import re
import urllib

from oxutils.cache import getUrl
from oxutils.text import findString

# to sniff itunes traffic, use something like
# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net

# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch%3Fmedia=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit&albumTerm=&pageName=Media+Search+Page-US-Music-PowerSearch&v2=PowerSearch&h5=appleitmsna%2Cappleitmsus&c2=PowerSearch HTTP/1.1


ITUNES_HEADERS = {
    'X-Apple-Tz': '0',
    'X-Apple-Storefront': '143441-1',
    'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',
    'Accept-Language': 'en-us, en;q=0.50',
    'Accept-Encoding': 'gzip',
    'Connection': 'close',
}

def composeUrl(request, parameters):
  if request == 'advancedSearch':
    url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + urllib.urlencode({
      'albumTerm': parameters['title'],
      'allArtistNames': parameters['artist'],
      'composerTerm': '',
      'flavor': 0,
      'genreIndex': 1,
      'media': 'music',
      'mediaType': 2,
      'ringtone': 0,
      'searchButton': 'submit',
      'songTerm': '',
    })
  elif request == 'viewAlbum':
    url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
  return url

def parseXmlDict(xml):
  values = {}
  strings = xml.split('<key>')
  for string in strings:
    if string.find('</key>') != -1:
      key = findString(string, '', '</key>')
      type = findString(string, '</key><', '>')
      if type == 'true/':
        value = True
      else:
        value = findString(string, '<%s>' % type, '</%s>' % type)
        if type == 'integer':
          value = int(value)
        elif type == 'string':
          value = value.replace('&#38;', '&')
          value = value.replace('&#39;', '\'')
      values[key] = value
  return values

class ItunesAlbum:
  def __init__(self, title, artist):
    self.title = title
    self.artist = artist
    self.id = self.getId()

  def getId(self):
    url = composeUrl('advancedSearch', {'title': self.title, 'artist': self.artist})
    xml = getUrl(url, None, ITUNES_HEADERS)
    id = findString(xml, 'viewAlbum?id=', '&')
    return id

  def getData(self):
    data = {'id': self.id}
    url = composeUrl('viewAlbum', {'id': self.id})
    xml = getUrl(url, None, ITUNES_HEADERS)
    xml = findString(xml, '<View>')
    data['albumName'] = findString(xml, '<B>', '<')
    data['artistName'] = findString(xml, '<b>', '<')
    data['coverUrl'] = findString(xml, 'reflection="1" url="', '"')
    data['genre'] = findString(xml, 'Genre: ', '<')
    data['releaseDate'] = findString(xml, 'Released', '<')
    data['review'] = findString(findString(xml, 'REVIEW</b>'), '<SetFontStyle normalStyle="textColor">', '</SetFontStyle>')
    data['tracks'] = []
    string = findString(findString(xml, '<key>items</key>', '</array>'), '<dict>')
    strings = string.split('<dict>')
    for string in strings:
        data['tracks'].append(parseXmlDict(string))
    data['type'] = findString(xml, '<key>listType</key><string>', '<')
    return data

if __name__ == '__main__':
  test = ItunesAlbum('So Red the Rose', 'Arcadia')
  print test.getData()