adding movies to itunes.py

2008-05-07 13:29:00 +02:00 · 2008-05-07 13:29:00 +02:00 · b697803926
commit b697803926
parent d04877e1a2
1 changed files with 102 additions and 20 deletions
--- a/ox/itunes.py
+++ b/ox/itunes.py
@ -9,8 +9,8 @@ from oxutils.text import findString
 # to sniff itunes traffic, use something like
 # sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net
-# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch%3Fmedia=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit&albumTerm=&pageName=Media+Search+Page-US-Music-PowerSearch&v2=PowerSearch&h5=appleitmsna%2Cappleitmsus&c2=PowerSearch HTTP/1.1
+# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit
-
+# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit
 ITUNES_HEADERS = {
    'X-Apple-Tz': '0',
@ -23,20 +23,38 @@ ITUNES_HEADERS = {
 def composeUrl(request, parameters):
  if request == 'advancedSearch':
-    url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?' + urllib.urlencode({
+    url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
-      'albumTerm': parameters['title'],
+    if parameters['media'] == 'music':
-      'allArtistNames': parameters['artist'],
+      url += urllib.urlencode({
-      'composerTerm': '',
+        'albumTerm': parameters['title'],
-      'flavor': 0,
+        'allArtistNames': parameters['artist'],
-      'genreIndex': 1,
+        'composerTerm': '',
-      'media': 'music',
+        'flavor': 0,
-      'mediaType': 2,
+        'genreIndex': 1,
-      'ringtone': 0,
+        'media': 'music',
-      'searchButton': 'submit',
+        'mediaType': 2,
-      'songTerm': '',
+        'ringtone': 0,
-    })
+        'searchButton': 'submit',
        'songTerm': ''
      })
    elif parameters['media'] == 'movie':
      url += urllib.urlencode({
        'actorTerm': '',
        'closedCaption': 0,
        'descriptionTerm': '',
        'directorProducerName': parameters['director'],
        'flavor': 0,
        'media': 'movie',
        'mediaType': 3,
        'movieTerm': parameters['title'],
        'ratingIndex': 1,
        'releaseYearTerm': '',
        'searchButton': 'submit'
      })
  elif request == 'viewAlbum':
    url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
  elif request == 'viewMovie':
    url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&s=143441' % parameters['id']
  return url
 def parseXmlDict(xml):
@ -57,15 +75,39 @@ def parseXmlDict(xml):
      values[key] = value
  return values
 def parseCast(xml, title):
  list = []
  try:
    strings = findRe(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
    strings.pop()
    for string in strings:
      list.append(findRe(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
    return list
  except:
    return list
 def parseMovies(xml, title):
  list = []
  strings = findRe(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
  strings.pop()
  for string in strings:
    list.append({
      'id': findRe(string, 'viewMovie\?id=(.*?)&'),
      'title': findRe(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
    })
  return list
 class ItunesAlbum:
-  def __init__(self, title, artist):
+  def __init__(self, id = '', title = '', artist = ''):
    self.id = id
    self.title = title
    self.artist = artist
-    self.id = self.getId()
+    if not id:
      self.id = self.getId()
  def getId(self):
-    url = composeUrl('advancedSearch', {'title': self.title, 'artist': self.artist})
+    url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
-    xml = getUrl(url, None, ITUNES_HEADERS)
+    xml = getUrl(url, headers = ITUNES_HEADERS)
    id = findRe(xml, 'viewAlbum\?id=(.*?)&')
    return id
@ -75,7 +117,7 @@ class ItunesAlbum:
    xml = getUrl(url, None, ITUNES_HEADERS)
    data['albumName'] = findRe(xml, '<B>(.*?)</B>')
    data['artistName'] = findRe(xml, '<b>(.*?)</b>')
-    data['coverUrl'] = findRe(xml, 'reflection="1" url="(.*?)"')
+    data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
    data['genre'] = findRe(xml, 'Genre:(.*?)<')
    data['releaseDate'] = findRe(xml, 'Released(.*?)<')
    data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
@ -86,8 +128,48 @@ class ItunesAlbum:
    data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')
    return data
 class ItunesMovie:
  def __init__(self, id = '', title = '', director = ''):
    self.id = id
    self.title = title
    self.director = director
    if not id:
      self.id = self.getId()
  def getId(self):
    url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
    xml = getUrl(url, headers = ITUNES_HEADERS)
    id = findRe(xml, 'viewMovie\?id=(.*?)&')
    return id
  def getData(self):
    data = {'id': self.id}
    url = composeUrl('viewMovie', {'id': self.id})
    xml = getUrl(url, None, ITUNES_HEADERS)
    data['actors'] = parseCast(xml, 'actors')
    string = findRe(xml, 'Average Rating:(.*?)</HBoxView>')
    data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
    data['directors'] = parseCast(xml, 'directors')
    data['format'] = findRe(xml, 'Format:(.*?)<')
    data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<'))
    data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
    data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
    data['producers'] = parseCast(xml, 'producers')
    data['rated'] = findRe(xml, 'Rated(.*?)<')
    data['relatedMovies'] = parseMovies(xml, 'related movies')
    data['releaseDate'] = findRe(xml, 'Released(.*?)<')
    data['runTime'] = findRe(xml, 'Run Time:(.*?)<')
    data['screenwriters'] = parseCast(xml, 'screenwriters')
    data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&')
    return data
 if __name__ == '__main__':
  import simplejson
-  data = ItunesAlbum('So Red the Rose', 'Arcadia').getData()
+  data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()
  print simplejson.dumps(data, sort_keys = True, indent = 4)
  data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()
  print simplejson.dumps(data, sort_keys = True, indent = 4)
  for v in data['relatedMovies']:
    data = ItunesMovie(id = v['id']).getData()
    print simplejson.dumps(data, sort_keys = True, indent = 4)
  # print test.getData()