python-ox/ox/web/itunes.py

# vi:si:et:sw=4:sts=4:ts=4
# encoding: utf-8
from __future__ import print_function
import re
from urllib.parse import urlencode

from ox.cache import read_url
from ox.html import decode_html, strip_tags
from ox.text import find_re
from ox.text import find_string


# to sniff itunes traffic, use something like
# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net

# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit
# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit

ITUNES_HEADERS = {
    'X-Apple-Tz': '0',
    'X-Apple-Storefront': '143441-1',
    'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',
    'Accept-Language': 'en-us, en;q=0.50',
    'Accept-Encoding': 'gzip',
    'Connection': 'close',
}

def compose_url(request, parameters):
    if request == 'advancedSearch':
        url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
        if parameters['media'] == 'music':
            url += urlencode({
              'albumTerm': parameters['title'],
              'allArtistNames': parameters['artist'],
              'composerTerm': '',
              'flavor': 0,
              'genreIndex': 1,
              'media': 'music',
              'mediaType': 2,
              'ringtone': 0,
              'searchButton': 'submit',
              'songTerm': ''
            })
        elif parameters['media'] == 'movie':
            url += urlencode({
              'actorTerm': '',
              'closedCaption': 0,
              'descriptionTerm': '',
              'directorProducerName': parameters['director'],
              'flavor': 0,
              'media': 'movie',
              'mediaType': 3,
              'movieTerm': parameters['title'],
              'ratingIndex': 1,
              'releaseYearTerm': '',
              'searchButton': 'submit'
            })
    elif request == 'viewAlbum':
        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
    elif request == 'viewMovie':
        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']
    return url

def parse_xml_dict(xml):
    values = {}
    strings = xml.split('<key>')
    for string in strings:
        if string.find('</key>') != -1:
            key = find_re(string, '(.*?)</key>')
            type = find_re(string, '</key><(.*?)>')
            if type == 'true/':
                value = True
            else:
                value = find_re(string, '<%s>(.*?)</%s>' % (type, type))
                if type == 'integer':
                  value = int(value)
                elif type == 'string':
                  value = decode_html(value)
            values[key] = value
    return values

def parse_cast(xml, title):
    list = []
    try:
        strings = find_re(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
        strings.pop()
        for string in strings:
            list.append(find_re(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        return list
    except:
        return list

def parse_movies(xml, title):
    list = []
    try:
        strings = find_re(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
        strings.pop()
        for string in strings:
            list.append({
              'id': find_re(string, r'viewMovie\?id=(.*?)&'),
              'title': find_re(string, r'<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
            })
        return list
    except:
        return list

class ItunesAlbum:
    def __init__(self, id = '', title = '', artist = ''):
        self.id = id
        self.title = title
        self.artist = artist
        if not id:
            self.id = self.get_id()

    def get_id(self):
        url = compose_url('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
        xml = read_url(url, headers = ITUNES_HEADERS)
        id = find_re(xml, r'viewAlbum\?id=(.*?)&')
        return id

    def get_data(self):
        data = {'id': self.id}
        url = compose_url('viewAlbum', {'id': self.id})
        xml = read_url(url, None, ITUNES_HEADERS)
        data['albumName'] = find_re(xml, '<B>(.*?)</B>')
        data['artistName'] = find_re(xml, '<b>(.*?)</b>')
        data['coverUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
        data['genre'] = find_re(xml, 'Genre:(.*?)<')
        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
        data['review'] = strip_tags(find_re(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        data['tracks'] = []
        strings = find_re(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
        for string in strings:
          data['tracks'].append(parse_xml_dict(string))
        data['type'] = find_re(xml, '<key>listType</key><string>(.*?)<')
        return data

class ItunesMovie:
    def __init__(self, id = '', title = '', director = ''):
        self.id = id
        self.title = title
        self.director = director
        if not id:
            self.id = self.get_id()

    def get_id(self):
        url = compose_url('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
        xml = read_url(url, headers = ITUNES_HEADERS)
        id = find_re(xml, r'viewMovie\?id=(.*?)&')
        return id

    def get_data(self):
        data = {'id': self.id}
        url = compose_url('viewMovie', {'id': self.id})
        xml = read_url(url, None, ITUNES_HEADERS)
        f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
        f.write(xml)
        f.close()
        data['actors'] = parse_cast(xml, 'actors')
        string = find_re(xml, 'Average Rating:(.*?)</HBoxView>')
        data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
        data['directors'] = parse_cast(xml, 'directors')
        data['format'] = find_re(xml, 'Format:(.*?)<')
        data['genre'] = decode_html(find_re(xml, 'Genre:(.*?)<'))
        data['plotSummary'] = decode_html(find_re(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        data['posterUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
        data['producers'] = parse_cast(xml, 'producers')
        data['rated'] = find_re(xml, 'Rated(.*?)<')
        data['relatedMovies'] = parse_movies(xml, 'related movies')
        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
        data['runTime'] = find_re(xml, 'Run Time:(.*?)<')
        data['screenwriters'] = parse_cast(xml, 'screenwriters')
        data['soundtrackId'] = find_re(xml, r'viewAlbum\?id=(.*?)&')
        data['trailerUrl'] = find_re(xml, 'autoplay="." url="(.*?)"')
        return data

if __name__ == '__main__':
    from ox.utils import json
    data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
    print(json.dumps(data, sort_keys = True, indent = 4))
    data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
    print(json.dumps(data, sort_keys = True, indent = 4))
    for v in data['relatedMovies']:
        data = ItunesMovie(id = v['id']).get_data()
        print(json.dumps(data, sort_keys = True, indent = 4))
    data = ItunesMovie(id='272960052').get_data()
    print(json.dumps(data, sort_keys = True, indent = 4))
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`# vi:si:et:sw=4:sts=4:ts=4`
			`# encoding: utf-8`
from __futre__ import print_function 2014-09-30 19:27:26 +00:00			`from __future__ import print_function`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`import re`
drop six and python2 support 2023-07-27 11:07:13 +00:00			`from urllib.parse import urlencode`
add ox.web to this repos 2010-07-07 23:25:57 +00:00
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`from ox.cache import read_url`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`from ox.html import decode_html, strip_tags`
			`from ox.text import find_re`
			`from ox.text import find_string`
add ox.web to this repos 2010-07-07 23:25:57 +00:00

			`# to sniff itunes traffic, use something like`
			`# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net`

			`# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit`
			`# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit`

			`ITUNES_HEADERS = {`
			`'X-Apple-Tz': '0',`
			`'X-Apple-Storefront': '143441-1',`
			`'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',`
			`'Accept-Language': 'en-us, en;q=0.50',`
			`'Accept-Encoding': 'gzip',`
			`'Connection': 'close',`
			`}`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def compose_url(request, parameters):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`if request == 'advancedSearch':`
			`url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'`
			`if parameters['media'] == 'music':`
more urlencode 2014-10-05 17:54:13 +00:00			`url += urlencode({`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`'albumTerm': parameters['title'],`
			`'allArtistNames': parameters['artist'],`
			`'composerTerm': '',`
			`'flavor': 0,`
			`'genreIndex': 1,`
			`'media': 'music',`
			`'mediaType': 2,`
			`'ringtone': 0,`
			`'searchButton': 'submit',`
			`'songTerm': ''`
			`})`
			`elif parameters['media'] == 'movie':`
more urlencode 2014-10-05 17:54:13 +00:00			`url += urlencode({`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`'actorTerm': '',`
			`'closedCaption': 0,`
			`'descriptionTerm': '',`
			`'directorProducerName': parameters['director'],`
			`'flavor': 0,`
			`'media': 'movie',`
			`'mediaType': 3,`
			`'movieTerm': parameters['title'],`
			`'ratingIndex': 1,`
			`'releaseYearTerm': '',`
			`'searchButton': 'submit'`
			`})`
			`elif request == 'viewAlbum':`
			`url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']`
			`elif request == 'viewMovie':`
			`url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']`
			`return url`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def parse_xml_dict(xml):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`values = {}`
			`strings = xml.split('<key>')`
			`for string in strings:`
			`if string.find('</key>') != -1:`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`key = find_re(string, '(.*?)</key>')`
			`type = find_re(string, '</key><(.*?)>')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`if type == 'true/':`
			`value = True`
			`else:`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`value = find_re(string, '<%s>(.*?)</%s>' % (type, type))`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`if type == 'integer':`
			`value = int(value)`
			`elif type == 'string':`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`value = decode_html(value)`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`values[key] = value`
			`return values`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def parse_cast(xml, title):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`list = []`
			`try:`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`strings = find_re(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`strings.pop()`
			`for string in strings:`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`list.append(find_re(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return list`
			`except:`
			`return list`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def parse_movies(xml, title):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`list = []`
			`try:`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`strings = find_re(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`strings.pop()`
			`for string in strings:`
			`list.append({`
escape strings 2024-09-11 21:52:01 +00:00			`'id': find_re(string, r'viewMovie\?id=(.*?)&'),`
			`'title': find_re(string, r'<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`})`
			`return list`
			`except:`
			`return list`

			`class ItunesAlbum:`
			`def __init__(self, id = '', title = '', artist = ''):`
			`self.id = id`
			`self.title = title`
			`self.artist = artist`
			`if not id:`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`self.id = self.get_id()`
add ox.web to this repos 2010-07-07 23:25:57 +00:00
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def get_id(self):`
			`url = compose_url('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`xml = read_url(url, headers = ITUNES_HEADERS)`
escape strings 2024-09-11 21:52:01 +00:00			`id = find_re(xml, r'viewAlbum\?id=(.*?)&')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return id`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def get_data(self):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`data = {'id': self.id}`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`url = compose_url('viewAlbum', {'id': self.id})`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`xml = read_url(url, None, ITUNES_HEADERS)`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`data['albumName'] = find_re(xml, '<B>(.*?)</B>')`
			`data['artistName'] = find_re(xml, '<b>(.*?)</b>')`
			`data['coverUrl'] = find_re(xml, 'reflection="." url="(.*?)"')`
			`data['genre'] = find_re(xml, 'Genre:(.*?)<')`
			`data['releaseDate'] = find_re(xml, 'Released(.*?)<')`
			`data['review'] = strip_tags(find_re(xml, 'REVIEW</b>.?<SetFontStyle normalStyle="textColor">(.?)</SetFontStyle>'))`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`data['tracks'] = []`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`strings = find_re(xml, '<key>items</key>.?<dict>(.?)$').split('<dict>')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`for string in strings:`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data['tracks'].append(parse_xml_dict(string))`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`data['type'] = find_re(xml, '<key>listType</key><string>(.*?)<')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return data`

			`class ItunesMovie:`
			`def __init__(self, id = '', title = '', director = ''):`
			`self.id = id`
			`self.title = title`
			`self.director = director`
			`if not id:`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`self.id = self.get_id()`
add ox.web to this repos 2010-07-07 23:25:57 +00:00
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def get_id(self):`
			`url = compose_url('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`xml = read_url(url, headers = ITUNES_HEADERS)`
escape strings 2024-09-11 21:52:01 +00:00			`id = find_re(xml, r'viewMovie\?id=(.*?)&')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return id`

ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`def get_data(self):`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`data = {'id': self.id}`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`url = compose_url('viewMovie', {'id': self.id})`
net/cache readUrl->read_url / Unicode -> unicode=True format replace all CammelCase with under_score 2012-08-14 13:58:05 +00:00			`xml = read_url(url, None, ITUNES_HEADERS)`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')`
			`f.write(xml)`
			`f.close()`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data['actors'] = parse_cast(xml, 'actors')`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`string = find_re(xml, 'Average Rating:(.*?)</HBoxView>')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`data['averageRating'] = string.count('rating_star_000033.png') + string.count('½') * 0.5`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data['directors'] = parse_cast(xml, 'directors')`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`data['format'] = find_re(xml, 'Format:(.*?)<')`
			`data['genre'] = decode_html(find_re(xml, 'Genre:(.*?)<'))`
			`data['plotSummary'] = decode_html(find_re(xml, 'PLOT SUMMARY</b>.?<SetFontStyle normalStyle="textColor">(.?)</SetFontStyle>'))`
			`data['posterUrl'] = find_re(xml, 'reflection="." url="(.*?)"')`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data['producers'] = parse_cast(xml, 'producers')`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`data['rated'] = find_re(xml, 'Rated(.*?)<')`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data['relatedMovies'] = parse_movies(xml, 'related movies')`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`data['releaseDate'] = find_re(xml, 'Released(.*?)<')`
			`data['runTime'] = find_re(xml, 'Run Time:(.*?)<')`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data['screenwriters'] = parse_cast(xml, 'screenwriters')`
escape strings 2024-09-11 21:52:01 +00:00			`data['soundtrackId'] = find_re(xml, r'viewAlbum\?id=(.*?)&')`
replace all CammelCase with under_score in ox 2012-08-14 14:12:43 +00:00			`data['trailerUrl'] = find_re(xml, 'autoplay="." url="(.*?)"')`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`return data`

			`if __name__ == '__main__':`
import json/simplejson in one place and use that 2010-07-28 13:08:06 +00:00			`from ox.utils import json`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()`
from __futre__ import print_function 2014-09-30 19:27:26 +00:00			`print(json.dumps(data, sort_keys = True, indent = 4))`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()`
from __futre__ import print_function 2014-09-30 19:27:26 +00:00			`print(json.dumps(data, sort_keys = True, indent = 4))`
add ox.web to this repos 2010-07-07 23:25:57 +00:00			`for v in data['relatedMovies']:`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data = ItunesMovie(id = v['id']).get_data()`
from __futre__ import print_function 2014-09-30 19:27:26 +00:00			`print(json.dumps(data, sort_keys = True, indent = 4))`
ox.web under_score api rewrite 2012-08-15 15:15:40 +00:00			`data = ItunesMovie(id='272960052').get_data()`
from __futre__ import print_function 2014-09-30 19:27:26 +00:00			`print(json.dumps(data, sort_keys = True, indent = 4))`
add ox.web to this repos 2010-07-07 23:25:57 +00:00