python-oxweb/ox/itunes.py

import re
import urllib

from oxutils.cache import getUrl
from oxutils.html import decodeHtml, stripTags
from oxutils.text import findRe
from oxutils.text import findString

# to sniff itunes traffic, use something like
# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net

# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit
# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit

ITUNES_HEADERS = {
    'X-Apple-Tz': '0',
    'X-Apple-Storefront': '143441-1',
    'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',
    'Accept-Language': 'en-us, en;q=0.50',
    'Accept-Encoding': 'gzip',
    'Connection': 'close',
}

def composeUrl(request, parameters):
  if request == 'advancedSearch':
    url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
    if parameters['media'] == 'music':
      url += urllib.urlencode({
        'albumTerm': parameters['title'],
        'allArtistNames': parameters['artist'],
        'composerTerm': '',
        'flavor': 0,
        'genreIndex': 1,
        'media': 'music',
        'mediaType': 2,
        'ringtone': 0,
        'searchButton': 'submit',
        'songTerm': ''
      })
    elif parameters['media'] == 'movie':
      url += urllib.urlencode({
        'actorTerm': '',
        'closedCaption': 0,
        'descriptionTerm': '',
        'directorProducerName': parameters['director'],
        'flavor': 0,
        'media': 'movie',
        'mediaType': 3,
        'movieTerm': parameters['title'],
        'ratingIndex': 1,
        'releaseYearTerm': '',
        'searchButton': 'submit'
      })
  elif request == 'viewAlbum':
    url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
  elif request == 'viewMovie':
    url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']
  return url

def parseXmlDict(xml):
  values = {}
  strings = xml.split('<key>')
  for string in strings:
    if string.find('</key>') != -1:
      key = findRe(string, '(.*?)</key>')
      type = findRe(string, '</key><(.*?)>')
      if type == 'true/':
        value = True
      else:
        value = findRe(string, '<%s>(.*?)</%s>' % (type, type))
        if type == 'integer':
          value = int(value)
        elif type == 'string':
          value = decodeHtml(value)
      values[key] = value
  return values

def parseCast(xml, title):
  list = []
  try:
    strings = findRe(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
    strings.pop()
    for string in strings:
      list.append(findRe(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
    return list
  except:
    return list

def parseMovies(xml, title):
  list = []
  strings = findRe(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
  strings.pop()
  for string in strings:
    list.append({
      'id': findRe(string, 'viewMovie\?id=(.*?)&'),
      'title': findRe(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
    })
  return list

class ItunesAlbum:
  def __init__(self, id = '', title = '', artist = ''):
    self.id = id
    self.title = title
    self.artist = artist
    if not id:
      self.id = self.getId()

  def getId(self):
    url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
    xml = getUrl(url, headers = ITUNES_HEADERS)
    id = findRe(xml, 'viewAlbum\?id=(.*?)&')
    return id

  def getData(self):
    data = {'id': self.id}
    url = composeUrl('viewAlbum', {'id': self.id})
    xml = getUrl(url, None, ITUNES_HEADERS)
    data['albumName'] = findRe(xml, '<B>(.*?)</B>')
    data['artistName'] = findRe(xml, '<b>(.*?)</b>')
    data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
    data['genre'] = findRe(xml, 'Genre:(.*?)<')
    data['releaseDate'] = findRe(xml, 'Released(.*?)<')
    data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
    data['tracks'] = []
    strings = findRe(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
    for string in strings:
      data['tracks'].append(parseXmlDict(string))
    data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')
    return data

class ItunesMovie:
  def __init__(self, id = '', title = '', director = ''):
    self.id = id
    self.title = title
    self.director = director
    if not id:
      self.id = self.getId()

  def getId(self):
    url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
    xml = getUrl(url, headers = ITUNES_HEADERS)
    id = findRe(xml, 'viewMovie\?id=(.*?)&')
    return id

  def getData(self):
    data = {'id': self.id}
    url = composeUrl('viewMovie', {'id': self.id})
    xml = getUrl(url, None, ITUNES_HEADERS)
    f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
    f.write(xml)
    f.close()
    data['actors'] = parseCast(xml, 'actors')
    string = findRe(xml, 'Average Rating:(.*?)</HBoxView>')
    data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
    data['directors'] = parseCast(xml, 'directors')
    data['format'] = findRe(xml, 'Format:(.*?)<')
    data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<'))
    data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
    data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
    data['producers'] = parseCast(xml, 'producers')
    data['rated'] = findRe(xml, 'Rated(.*?)<')
    data['relatedMovies'] = parseMovies(xml, 'related movies')
    data['releaseDate'] = findRe(xml, 'Released(.*?)<')
    data['runTime'] = findRe(xml, 'Run Time:(.*?)<')
    data['screenwriters'] = parseCast(xml, 'screenwriters')
    data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&')
    data['trailerUrl'] = findRe(xml, 'autoplay="." url="(.*?)"')
    return data

if __name__ == '__main__':
  import simplejson
  data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()
  print simplejson.dumps(data, sort_keys = True, indent = 4)
  data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()
  print simplejson.dumps(data, sort_keys = True, indent = 4)
  for v in data['relatedMovies']:
    data = ItunesMovie(id = v['id']).getData()
    print simplejson.dumps(data, sort_keys = True, indent = 4)
  # print test.getData()
adding itunes.py 2008-04-29 13:08:23 +00:00			`import re`
			`import urllib`

			`from oxutils.cache import getUrl`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`from oxutils.html import decodeHtml, stripTags`
			`from oxutils.text import findRe`
adding itunes.py 2008-04-29 13:08:23 +00:00			`from oxutils.text import findString`

			`# to sniff itunes traffic, use something like`
			`# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net`

adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit`
			`# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit`
adding itunes.py 2008-04-29 13:08:23 +00:00
			`ITUNES_HEADERS = {`
			`'X-Apple-Tz': '0',`
			`'X-Apple-Storefront': '143441-1',`
			`'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',`
			`'Accept-Language': 'en-us, en;q=0.50',`
			`'Accept-Encoding': 'gzip',`
			`'Connection': 'close',`
			`}`

			`def composeUrl(request, parameters):`
			`if request == 'advancedSearch':`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'`
			`if parameters['media'] == 'music':`
			`url += urllib.urlencode({`
			`'albumTerm': parameters['title'],`
			`'allArtistNames': parameters['artist'],`
			`'composerTerm': '',`
			`'flavor': 0,`
			`'genreIndex': 1,`
			`'media': 'music',`
			`'mediaType': 2,`
			`'ringtone': 0,`
			`'searchButton': 'submit',`
			`'songTerm': ''`
			`})`
			`elif parameters['media'] == 'movie':`
			`url += urllib.urlencode({`
			`'actorTerm': '',`
			`'closedCaption': 0,`
			`'descriptionTerm': '',`
			`'directorProducerName': parameters['director'],`
			`'flavor': 0,`
			`'media': 'movie',`
			`'mediaType': 3,`
			`'movieTerm': parameters['title'],`
			`'ratingIndex': 1,`
			`'releaseYearTerm': '',`
			`'searchButton': 'submit'`
			`})`
adding itunes.py 2008-04-29 13:08:23 +00:00			`elif request == 'viewAlbum':`
			`url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`elif request == 'viewMovie':`
adding movie trailers to itunes.py 2008-05-07 11:54:37 +00:00			`url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']`
adding itunes.py 2008-04-29 13:08:23 +00:00			`return url`

			`def parseXmlDict(xml):`
			`values = {}`
			`strings = xml.split('<key>')`
			`for string in strings:`
			`if string.find('</key>') != -1:`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`key = findRe(string, '(.*?)</key>')`
			`type = findRe(string, '</key><(.*?)>')`
adding itunes.py 2008-04-29 13:08:23 +00:00			`if type == 'true/':`
			`value = True`
			`else:`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`value = findRe(string, '<%s>(.*?)</%s>' % (type, type))`
adding itunes.py 2008-04-29 13:08:23 +00:00			`if type == 'integer':`
			`value = int(value)`
			`elif type == 'string':`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`value = decodeHtml(value)`
adding itunes.py 2008-04-29 13:08:23 +00:00			`values[key] = value`
			`return values`

adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`def parseCast(xml, title):`
			`list = []`
			`try:`
			`strings = findRe(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')`
			`strings.pop()`
			`for string in strings:`
			`list.append(findRe(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))`
			`return list`
			`except:`
			`return list`

			`def parseMovies(xml, title):`
			`list = []`
			`strings = findRe(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')`
			`strings.pop()`
			`for string in strings:`
			`list.append({`
			`'id': findRe(string, 'viewMovie\?id=(.*?)&'),`
			`'title': findRe(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')`
			`})`
			`return list`

adding itunes.py 2008-04-29 13:08:23 +00:00			`class ItunesAlbum:`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`def __init__(self, id = '', title = '', artist = ''):`
			`self.id = id`
adding itunes.py 2008-04-29 13:08:23 +00:00			`self.title = title`
			`self.artist = artist`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`if not id:`
			`self.id = self.getId()`
adding itunes.py 2008-04-29 13:08:23 +00:00
			`def getId(self):`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})`
			`xml = getUrl(url, headers = ITUNES_HEADERS)`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`id = findRe(xml, 'viewAlbum\?id=(.*?)&')`
adding itunes.py 2008-04-29 13:08:23 +00:00			`return id`

			`def getData(self):`
			`data = {'id': self.id}`
			`url = composeUrl('viewAlbum', {'id': self.id})`
			`xml = getUrl(url, None, ITUNES_HEADERS)`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`data['albumName'] = findRe(xml, '<B>(.*?)</B>')`
			`data['artistName'] = findRe(xml, '<b>(.*?)</b>')`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`data['genre'] = findRe(xml, 'Genre:(.*?)<')`
			`data['releaseDate'] = findRe(xml, 'Released(.*?)<')`
			`data['review'] = stripTags(findRe(xml, 'REVIEW</b>.?<SetFontStyle normalStyle="textColor">(.?)</SetFontStyle>'))`
adding itunes.py 2008-04-29 13:08:23 +00:00			`data['tracks'] = []`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`strings = findRe(xml, '<key>items</key>.?<dict>(.?)$').split('<dict>')`
adding itunes.py 2008-04-29 13:08:23 +00:00			`for string in strings:`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`data['tracks'].append(parseXmlDict(string))`
			`data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')`
adding itunes.py 2008-04-29 13:08:23 +00:00			`return data`

adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`class ItunesMovie:`
			`def __init__(self, id = '', title = '', director = ''):`
			`self.id = id`
			`self.title = title`
			`self.director = director`
			`if not id:`
			`self.id = self.getId()`

			`def getId(self):`
			`url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})`
			`xml = getUrl(url, headers = ITUNES_HEADERS)`
			`id = findRe(xml, 'viewMovie\?id=(.*?)&')`
			`return id`

			`def getData(self):`
			`data = {'id': self.id}`
			`url = composeUrl('viewMovie', {'id': self.id})`
			`xml = getUrl(url, None, ITUNES_HEADERS)`
adding movie trailers to itunes.py 2008-05-07 11:54:37 +00:00			`f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')`
			`f.write(xml)`
			`f.close()`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`data['actors'] = parseCast(xml, 'actors')`
			`string = findRe(xml, 'Average Rating:(.*?)</HBoxView>')`
			`data['averageRating'] = string.count('rating_star_000033.png') + string.count('½') * 0.5`
			`data['directors'] = parseCast(xml, 'directors')`
			`data['format'] = findRe(xml, 'Format:(.*?)<')`
			`data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<'))`
			`data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY</b>.?<SetFontStyle normalStyle="textColor">(.?)</SetFontStyle>'))`
			`data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"')`
			`data['producers'] = parseCast(xml, 'producers')`
			`data['rated'] = findRe(xml, 'Rated(.*?)<')`
			`data['relatedMovies'] = parseMovies(xml, 'related movies')`
			`data['releaseDate'] = findRe(xml, 'Released(.*?)<')`
			`data['runTime'] = findRe(xml, 'Run Time:(.*?)<')`
			`data['screenwriters'] = parseCast(xml, 'screenwriters')`
			`data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&')`
adding movie trailers to itunes.py 2008-05-07 11:54:37 +00:00			`data['trailerUrl'] = findRe(xml, 'autoplay="." url="(.*?)"')`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`return data`

some more itunes.py 2008-04-29 13:16:51 +00:00			`if __name__ == '__main__':`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`import simplejson`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()`
			`print simplejson.dumps(data, sort_keys = True, indent = 4)`
			`data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`print simplejson.dumps(data, sort_keys = True, indent = 4)`
adding movies to itunes.py 2008-05-07 11:29:00 +00:00			`for v in data['relatedMovies']:`
			`data = ItunesMovie(id = v['id']).getData()`
			`print simplejson.dumps(data, sort_keys = True, indent = 4)`
itunes.py: getting rid of findString() 2008-05-07 08:15:25 +00:00			`# print test.getData()`