depend on ox, install as ox.web, migrate getUrl to readUrl

2009-10-12 13:47:43 +02:00 · 2009-10-12 13:47:43 +02:00 · 16eeaf8b25
commit 16eeaf8b25
parent d2849d44ef
28 changed files with 169 additions and 172 deletions
--- a/oxweb/init.py
+++ b/oxweb/init.py
@ -1,9 +0,0 @@
-# vi:si:et:sw=4:sts=4:ts=4
-# encoding: utf-8
-__version__ = '0.1.0'
-
-import imdb
-import wikipedia
-import google
-import piratecinema
-import oxdb
--- a/oxweb/allmovie.py
+++ b/oxweb/allmovie.py
@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-import time
-
-from oxlib import stripTags, findRe
-from oxlib.cache import getUrlUnicode
-
-
-def getId(url):
-    return url.split("/")[-2]
-
-def getData(id):
-    '''
-    >>> getData('129689')['cast'][1][1]
-    u'Marianne'
-    >>> getData('129689')['credits'][0][0]
-    u'Jean-Luc Godard'
-    >>> getData('129689')['posters'][0]
-    u'http://image.allmusic.com/00/adg/cov200/dru800/u812/u81260bbffr.jpg'
-    >>> getData('129689')['rating']
-    u'4.5'
-    '''
-    data = {
-        "url": getUrl(id)
-    }
-    html = getUrlUnicode(data["url"])
-    data['aka'] = parseList(html, 'AKA')
-    data['category'] = findRe(html, 'http://allmovie.com/explore/category/.*?">(.*?)</a>')
-    data['countries'] = parseList(html, 'Countries')
-    data['director'] = parseEntry(html, 'Director')
-    data['genres'] = parseList(html, 'Genres')
-    data['keywords'] = parseList(html, 'Keywords')
-    data['posters'] = [findRe(html, '<img src="(http://image\..*?)"')]
-    data['produced'] = parseList(html, 'Produced by')
-    data['rating'] = findRe(html, 'Stars" title="(.*?) Stars"')
-    data['released'] = parseEntry(html, 'Released by')
-    data['releasedate'] = parseEntry(html, 'Release')[0:10].replace(' ', '-')
-    data['runtime'] = findRe(html, '<td class="formed-sub" style="width: 86px;">(\d+) min.</td>')
-    data['set'] = parseEntry(html, 'Set In')
-    data['synopsis'] = parseText(html, 'Plot Synopsis')
-    data['themes'] = parseList(html, 'Themes')
-    data['types'] = parseList(html, 'Types')
-    data['year'] = findRe(html, '"http://allmovie.com/explore/year/(.*?)"')
-    html = getUrlUnicode("http://allmovie.com/work/%s/cast" % id)
-    data['cast'] = parseTable(html)
-    html = getUrlUnicode("http://allmovie.com/work/%s/credits" % id)
-    data['credits'] = parseTable(html)
-    html = getUrlUnicode("http://allmovie.com/work/%s/review" % id)
-    data['review'] = parseText(html, 'Review')
-    return data
-
-def getUrl(id):
-    return "http://allmovie.com/work/%s/" % id
-
-def parseEntry(html, title):
-    return stripTags(findRe(html, '<span>%s</span>(.*?)</table>' % title)).strip()
-
-def parseList(html, title):
-    html = findRe(html, '<span>%s</span>(.*?)</table>' % title)
-    return map(lambda x: stripTags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
-
-def parseTable(html):
-    return map(
-        lambda x: map(
-            lambda x: stripTags(x).strip().replace('&nbsp;', ''),
-            x.split('<td width="305">-')
-        ),
-        findRe(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
-    )
-
-def parseText(html, title):
-    return stripTags(findRe(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
-
-if __name__ == '__main__':
-    print getData('129689')
-    # print getData('177524')
-
--- a/oxweb/auth.py
+++ b/oxweb/auth.py
@ -1,22 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-# GPL 2009
-import os
-import simplejson
-
-import oxlib
-
-
-def get(key):
-    user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json'))
-    auth = {}
-    if os.path.exists(user_auth):
-        f = open(user_auth, "r")
-        data = f.read()
-        f.close()
-        auth = simplejson.loads(data)
-    if key in auth:
-        return auth[key]
-    print "please add key %s to json file '%s'" % (key, user_auth)
-    return ""
-
--- a/oxweb/criterion.py
+++ b/oxweb/criterion.py
@ -1,90 +0,0 @@
-# -*- coding: UTF-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-
-import oxlib.cache
-from oxlib.cache import getUrlUnicode
-from oxlib.html import stripTags
-from oxlib.text import findRe, removeSpecialCharacters
-
-import imdb
-
-def getId(url):
-    return url.split("/")[-1]
-
-def getUrl(id):
-    return "http://www.criterion.com/films/%s" % id
-
-def getData(id):
-    '''
-    >>> getData('1333')['imdbId']
-    '0060304'
-
-    >>> getData('236')['posters'][0]
-    'http://criterion_production.s3.amazonaws.com/release_images/1586/ThirdManReplace.jpg'
-
-    >>> getData('786')['posters'][0]
-    'http://criterion_production.s3.amazonaws.com/product_images/185/343_box_348x490.jpg'
-    '''
-    data = {
-        "url": getUrl(id)
-    }
-    try:
-        html = getUrlUnicode(data["url"])
-    except:
-        html = oxlib.cache.getUrl(data["url"])
-    data["number"] = findRe(html, "<p class=\"spinenumber\">(.*?)</p>")
-    data["title"] = findRe(html, "<h2 class=\"movietitle\">(.*?)</h2>")
-    data["director"] = findRe(html, "<h2 class=\"director\">(.*?)</h2>")
-    results = re.compile("<p><strong>(.*?)</strong></p>").findall(html)
-    data["country"] = results[0]
-    data["year"] = results[1]
-    result = findRe(html, "<div class=\"synopsis contentbox lightgray\">(.*?)</div>")
-    data["synopsis"] = findRe(result, "<p>(.*?)</p>")
-    result = findRe(html, "<div class=\"editioninfo\">(.*?)</div>")
-    if 'Blu-Ray' in result or 'Essential Art House DVD' in result:
-        result = re.compile("<div class=\"editioninfo\">(.*?)</div>", re.DOTALL).findall(html)[1]
-    result = findRe(result, "<a href=\"(.*?)\">")
-    if not "/boxsets/" in result:
-        data["posters"] = [result]
-    else:
-        html_ = getUrlUnicode(result)
-        result = findRe(html_, "<a href=\"http://www.criterion.com/films/%s\">(.*?)</a>" % id)
-        result = findRe(result, "src=\"(.*?)\"")
-        data["posters"] = [result.replace("_w100", "")]
-    result = findRe(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"")
-    if result:
-        data["stills"] = [result]
-        data["trailers"] = []
-    else:
-        data["stills"] = [findRe(html, "\"thumbnailURL\", \"(.*?)\"")]
-        data["trailers"] = [findRe(html, "\"videoURL\", \"(.*?)\"")]
-    data['imdbId'] = imdb.getMovieId(data['title'], data['director'], data['year'])
-    return data
-
-def getIds():
-    ids = []
-    html = getUrlUnicode("http://www.criterion.com/library/dvd")
-    results = re.compile("page=(.*?)\"").findall(html)
-    pages = int(results[len(results) - 2])
-    for page in range(pages, 0, -1):
-        for id in getIdsByPage(page):
-            ids.append(id)
-    return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
-
-def getIdsByPage(page):
-    ids = []
-    html = getUrlUnicode("http://www.criterion.com/library/dvd?page=%s" % page)
-    results = re.compile("films/(.*?)\"").findall(html)
-    for result in results:
-        ids.append(result)
-    results = re.compile("boxsets/(.*?)\"").findall(html)
-    for result in results:
-        html = getUrlUnicode("http://www.criterion.com/boxsets/" + result)
-        results = re.compile("films/(.*?)\"").findall(html)
-        for result in results:
-            ids.append(result)
-    return set(ids)
-
-if __name__ == '__main__':
-    print getIds()
--- a/oxweb/dailymotion.py
+++ b/oxweb/dailymotion.py
@ -1,22 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-from urllib import unquote
-from oxlib.cache import getUrl
-
-
-def getVideoUrl(url):
-    '''
-    >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?key')[0]
-    'http://www.dailymotion.com/get/16/320x240/flv/6191379.flv'
-
-    >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?key')[0]
-    'http://www.dailymotion.com/get/15/320x240/flv/6197800.flv'
-    '''
-    data = getUrl(url)
-    video = re.compile('''video", "(.*?)"''').findall(data)
-    for v in video:
-       v =  unquote(v).split('@@')[0]
-       return "http://www.dailymotion.com" + v
-    return ''
-
--- a/oxweb/epguides.py
+++ b/oxweb/epguides.py
@ -1,49 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-import time
-
-from oxlib import stripTags, findRe
-from oxlib.cache import getUrlUnicode
-
-import google
-
-
-def getShowUrl(title):
-    ''' 
-    Search Epguide Url for Show via Show Title.
-    Use Google to search the url, this is also done on Epguide.
-    '''
-    for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1):
-        if url.startswith('http://epguides.com'):
-              if re.search(title, name):
-                    return url
-    return None
-
-def getShowData(url):
-    data = getUrlUnicode(url)
-    r = {}
-    r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
-    r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
-    r['episodes'] = {}
-    #1.   1- 1       1001      7 Aug 05   You Can't Miss the Bear
-    for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
-        air_date = episode[3].strip()
-        #'22 Sep 04' -> 2004-09-22 
-        try:
-            air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
-        except:
-            pass
-        s = episode[1].split('-')[0].strip()
-        e = episode[1].split('-')[-1].strip()
-        try:
-            r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
-                'prod code': episode[2],
-                'air date': air_date,
-                'url': episode[4],
-                'title':episode[5],
-            }
-        except:
-            print "oxweb.epguides failed,", url
-    return r
-
--- a/oxweb/google.py
+++ b/oxweb/google.py
@ -1,57 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-import time
-import urllib
-import urllib2
-import weakref
-import threading
-import Queue
-import simplejson
-
-
-import oxlib 
-from oxlib import stripTags
-
-
-'''
-usage:
-import google
-google.find(query)
-
-for result in google.find(query): result
-
-result is title, url, description
-
-google.find(query, max_results)
-
-FIXME: how search depper than first page?
-'''
-DEFAULT_MAX_RESULTS = 10
-DEFAULT_TIMEOUT = 24*60*60
-
-def getUrl(url, data=None, headers=oxlib.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
-    return oxlib.cache.getUrl(url, data, headers, timeout)
-
-def quote_plus(s):
-    return urllib.quote_plus(s.encode('utf-8'))
-
-def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
-    url = "http://www.google.com/search?q=%s" % quote_plus(query)
-    data = getUrl(url, timeout=timeout)
-    link_re = r'<a href="(?P<url>[^"]*?)" class=l.*?>(?P<name>.*?)</a>' +  \
-              r'.*?(?:<br>|<table.*?>)' +  \
-              r'(?P<desc>.*?)' + '(?:<font color=#008000>|<a)'
-    results = []
-    for match in re.compile(link_re, re.DOTALL).finditer(data):
-        (name, url, desc) = match.group('name', 'url', 'desc')
-        results.append((stripTags(name), url, stripTags(desc)))
-    if len(results) > max_results:
-        results = results[:max_results]
-    return results
-
-def _find(query):
-    url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s' % quote_plus(query)
-    results = simplejson.loads(getUrlUnicode(url))['responseData']['results']
-    return results
-
--- a/oxweb/imdb.py
+++ b/oxweb/imdb.py
@ -1,756 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import urllib2
-from urllib import quote, unquote
-import re
-import os
-import time
-
-from BeautifulSoup import BeautifulSoup
-import chardet
-import oxlib
-from oxlib import stripTags, decodeHtml, findRe, findString
-import oxlib.cache
-from oxlib.normalize import normalizeTitle, normalizeImdbId
-from oxlib import *
-
-import google
-
-'''
-    never timeout imdb data, to update cache remove data from cache folder
-'''
-def getUrlUnicode(url, data=None, headers=oxlib.cache.DEFAULT_HEADERS, timeout=-1):
-    return oxlib.cache.getUrlUnicode(url, data, headers, timeout)
-
-'''
-check if result is valid while updating
-def validate(result, header):
-    return header['status'] == u'200'
-
-try:
-    d = oxlib.cache.getUrlUnicode(url, data, headers, timeout=0, valid=validate)
-except oxlib.cache.InvalidResult, e:
-    print e.headers
-
-'''
-def getMovieId(title, director='', year=''):
-    '''
-    >>> getMovieId('The Matrix')
-    '0133093'
-    '''
-    if year:
-        title = "%s (%s)" % (title, year)
-    if director:
-        query = 'site:imdb.com %s "%s"' % (director, title)
-    else:
-        query = 'site:imdb.com "%s"' % title
-    for (name, url, desc) in google.find(query, 3, timeout=-1):
-        if url.startswith('http://www.imdb.com/title/tt'):
-            return url[28:35]
-    return ''
-
-def getMovieData(imdbId):
-    return IMDb(imdbId).parse()
-
-# internal functions below
-def getUrlBase(imdbId):
-    return "http://www.imdb.com/title/tt%s/" % imdbId
-
-def getRawMovieData(imdbId):
-    imdbId = normalizeImdbId(imdbId)
-    data = getMovieInfo(imdbId)
-    data['credits'] = getMovieCredits(imdbId)
-    data['poster'] = getMoviePoster(imdbId)
-    data['company credits'] = getMovieCompanyCredits(imdbId)
-    data['filming locations'] = getMovieLocations(imdbId)
-    data['movie connections'] = getMovieConnections(imdbId)
-    data['external reviews'] = getMovieExternalReviews(imdbId)
-    data['trivia'] = getMovieTrivia(imdbId)
-    data['keywords'] = getMovieKeywords(imdbId)
-    data['media'] = {}
-    data['media']['images'] = getMovieImages(imdbId)
-    data['media']['trailers'] = getMovieTrailers(imdbId)
-    data['plotsummary'] = getMoviePlot(imdbId)
-    data['release dates'] = getMovieReleaseDates(imdbId)
-    data['release date'] = getMovieReleaseDate(imdbId)
-    return data
-
-def getMovieInfo(imdbId):
-    data = getUrlUnicode(getUrlBase(imdbId))
-    info = dict()
-    info['poster'] = findRe(data, 'name="poster".*?<img .*?src="(.*?)"')
-    if info['poster'] and '_V' in info['poster']:
-        info['poster']= "%s.jpg" % info['poster'].split('._V')[0]
-
-    for i in re.compile('<h5>(.*?):</h5>(.*?)<div class="info"', re.DOTALL).findall(data):
-        title = stripTags(i[0]).strip().lower()
-        txt= stripTags(i[1]).strip()
-        def cleanUp(k):
-            k = decodeHtml(k).replace(u'\xa0', ' ').strip()
-            if k.endswith('more'): k=k[:-len('more')].strip()
-            return k
-        txt = cleanUp(txt)
-        if title not in ('plot', 'trivia', 'filming locations', 'mpaa', 'tagline', 'original air date'):
-            if '|' in txt:
-                txt = [cleanUp(k) for k in txt.split('|')]
-            elif ', ' in txt:
-                txt = [cleanUp(k) for k in txt.split(', ')]
-            elif title in ('country', 'language', 'genre'):
-                txt = [cleanUp(txt), ]
-        if title == 'tv series':
-            info['series_imdb'] = findRe(i[1], 'tt(\d{7})')
-        if title == 'original air date':
-            info['series_episode_info'] = txt.split('\n')[-1].strip()
-            txt = txt.split('\n')[0].strip()
-        if not title.startswith('moviemeter'):
-            info[title] = txt
-    for key in ('user comments', 'writers (wga)', 'plot keywords'):
-       if key in info:
-          del info[key]
-    if 'release date' in info:
-        if isinstance(info['release date'], list):
-            info['release date'] = info['release date'][0]
-        info['release date'] = info['release date'].split('\n')[0]
-    if 'plot' in info:
-        info['plot'] = info['plot'].split('| add synopsis')[0].strip()
-        info['plot'] = info['plot'].split('| full synopsis')[0].strip()
-        if info['plot'] in ('add synopsis', 'full synopsis'):
-            info['plot'] = ''
-
-    #get Title
-    title = ''
-    year = ''
-    html_title = findRe(data, '<div id="tn15title">(.*?)</div>')
-    if not html_title:
-        html_title = findRe(data, '<title>(.*?)</title>')
-    else:
-        html_title = html_title.split('<span class="pro-link">')[0]
-    if html_title:
-        html_title = html_title.replace('<br />', ' ').replace('  ', ' ')
-        title = stripTags(html_title)
-        title = decodeHtml(title)
-        year = findRe(title, '\((\d{4})\)')
-        if not year:
-            year = findRe(title, '\((\d{4})')
-        _y = findRe(title, r'(\([0-9\?]{4}[/IVXLCDM]*?\))')
-        if _y:
-            title = title.replace(_y, '')
-        for t in ('TV series', 'TV-Series', 'TV mini-series', '(mini)', '(VG)', '(V)', '(TV)'):
-            title = title.replace(t, '')
-    title = title.strip()
-    if title.find(u'\xa0') > -1:
-        title = title[:title.find(u'\xa0')].strip()
-    if title.startswith('"') and title.endswith('"'):
-        title = title[1:-1]
-    info['title'] = normalizeTitle(title)
-    info['year'] = year
-
-    #Series
-    if title.startswith('"') and title.find('"',1) > 0 and \
-        title.find('"',1) == title.rfind('"'):
-        episode_title = title[title.rfind('"')+1:]
-        episode_title = re.sub("\?{4}", "", episode_title).strip()
-        episode_title = re.sub("\d{4}", "", episode_title).strip()
-        if episode_title == '-': episode_title=''
-        title = normalizeTitle(title[1:title.rfind('"')])
-        if episode_title:
-            info['episode title'] = episode_title
-            info['series title'] = title
-            info['title'] = "%s: %s" % (title, episode_title)
-        else:
-            info['title'] = title
-
-    se = re.compile("Season (\d*), Episode (\d*)\)").findall(info.get('series_episode_info', ''))
-    if se:
-        info['season'] = int(se[0][0])
-        info['episode'] = int(se[0][1])
-        info['title'] = "%s (S%02dE%02d) %s" % (
-                    info['series title'], info['season'], info['episode'], info['episode title'])
-        info['title'] = info['title'].strip()
-        del info['series_episode_info']
-
-    #Rating
-    rating = findRe(data, '<b>([\d\.]*?)/10</b>')
-    if rating:
-        info['rating'] = float(rating)
-    else:
-        info['rating'] = -1
-
-    #Votes
-    info['votes'] = -1
-    if "user rating" in info:
-        votes = findRe(info['user rating'], '([\d,]*?) votes')
-        if votes:
-            info['votes'] = int(votes.replace(',', ''))
-    return info
-
-def getMovieRuntimeSeconds(imdbId):
-    info = getMovieInfo(imdbId)
-    if 'runtime' in info:
-        value = info['runtime'][0]
-        parsed_value = findRe(value, '(.*?) min')
-        parsed_value = findRe(parsed_value, '([0-9]+)')
-        if not parsed_value:
-            parsed_value = findRe(value, '(.*?) sec')
-            parsed_value = findRe(parsed_value, '([0-9]+)')
-            if not parsed_value:
-                parsed_value = 0
-            else:
-                parsed_value = int(parsed_value)
-        else:
-            parsed_value = int(parsed_value) * 60
-    else:
-        parsed_value = -1
-    return parsed_value
-
-def getMoviePoster(imdbId):
-    info = getMovieInfo(imdbId)
-    return info['poster']
-
-def getMovieYear(imdbId):
-    '''
-    >>> getMovieYear('0315404')
-    u'1964'
-
-    >>> getMovieYear('0734840')
-    u'1990'
-
-    >>> getMovieYear('0815352')
-    u'1964'
-    '''
-    info = getMovieInfo(imdbId)
-    return info['year']
-
-def getMovieTitle(imdbId):
-    '''
-    >>> getMovieTitle('0306414')
-    u'The Wire'
-
-    >>> getMovieTitle('0734840')
-    u'Twin Peaks (S01E02) Episode #1.2'
-
-    >>> getMovieTitle('0734840')
-    u'Twin Peaks (S01E02) Episode #1.2'
-
-    >>> getMovieTitle('0749451')
-    u'The Wire (S01E01) The Target'
-    '''
-    info = getMovieInfo(imdbId)
-    return info['title']
-
-def getMovieAKATitles(imdbId):
-    '''
-    >>> getMovieAKATitle('0040980')
-    [(u'Frauen der Nacht', u'Germany'),
-     (u'Les femmes de la nuit', u'France'),
-     (u'Women of the Night', u'(undefined)')]
-    '''
-    url = "%sreleaseinfo" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    titles = findRe(data, 'name="akas".*?<table.*?>(.*?)</table>')
-    titles = re.compile("td>(.*?)</td>\n\n<td>(.*)</td>").findall(titles)
-    return titles
-
-def creditList(data, section=None):
-    if section == 'cast':
-        credits_ = re.compile('''<tr .*?<td class="nm">(.*?)</td><td class="ddd">.*?</td><td class="char">(.*?)</td></tr>''').findall(data)
-    else:
-        credits_ = re.compile('''<tr>.*?<td valign="top">(.*?)</td><td.*?</td><td valign="top">(.*?)</td></tr>''').findall(data)
-    credits = []
-    for c_ in credits_:
-        c = [decodeHtml(c_[0]).strip(), decodeHtml(c_[1]).strip()]
-        if section=='writers':
-            c[1] = c[1].replace('<br>', '').strip().replace(')', '').replace('(','')
-            if c[1].endswith(' and'): c[1] = c[1][:-4]
-        credits.append(c)
-    return credits
-
-def getMovieCredits(imdbId):
-    credits = dict()
-    url = "%sfullcredits" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    groups = data.split('<h5>')
-    for g in groups:
-        section = re.compile('''name="(.*?)".*? href="/Glossary''').findall(g)
-        if section:
-            credits[section[0]] = creditList(g, section[0])
-    return credits
-
-def getMovieTrailers(imdbId):
-    url = "%strailers" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    soup = BeautifulSoup(data)
-    videos = soup('div', {'class':"video-gallery"})
-    trailers = []
-    if videos:
-        for a in videos[0]('a'):
-            title = stripTags(unicode(a)).strip()
-            url = 'http://www.imdb.com' + a['href']
-            videoId = findRe(url, '/(vi\d*?)/')
-            iframeUrl = "http://www.imdb.com/video/trailer/%s/player" % videoId
-            iframe = getUrlUnicode(iframeUrl)
-            videoUrl = unquote(findRe(iframe, 'addVariable\("file", "(.*?)"'))
-            trailers.append({'title': title, 'url': url, 'iframe': iframeUrl, 'flv':videoUrl})
-    return trailers
-
-def getMovieQuotes(imdbId):
-    url = "%squotes" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    quotes = re.compile('<b>(.*?)</b>:(.*?)<br>', re.DOTALL).findall(findString(data, '<a name="q'))
-    quotes = [(q[0].strip(),q[1].strip())  for q in quotes]
-    return quotes
-
-def getMoviePlot(imdbId):
-    url = "%splotsummary" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    plot = findRe(data, '<p class="plotpar">(.*?)<i>').split('</p>')[0]
-    return plot.strip()
-
-def getMovieTechnical(imdbId):
-    url = "%stechnical" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    results = {}
-    for t in re.compile('<h5>(.*?)</h5>(.*?)<br/>', re.DOTALL).findall(data):
-        results[t[0].strip()] = t[1].strip()
-    return results
-
-def getMovieCompanyCredits(imdbId):
-    url = "%scompanycredits" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    results = {}
-    for field, c in re.compile('<h2>(.*?)</h2><ul>(.*?)</ul>').findall(data):
-        results[field.strip()] = []
-        for company in re.compile('<li>(.*?)</li>').findall(c):
-            results[field.strip()].append(company)
-    return results
-
-def getMovieLocations(imdbId):
-    url = "%slocations" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    locations = re.compile('<dt><a href="/List.*?>(.*?)</a></dt>').findall(data)
-    return locations
-
-def getMovieImages(imdbId, keys=('still_frame', 'poster', 'product')):
-    photos = {}
-    for key in keys:
-        url = "%smediaindex?refine=%s" % (getUrlBase(imdbId), key)
-        data = getUrlUnicode(url)
-        photos[key] = {}
-        for s in  re.compile('''<img alt="(.*?)".*?src="(http://ia.media-imdb.com/.*?.jpg)''').findall(data):
-            img= "%s.jpg" % s[1].split('._V')[0]
-            title = s[0]
-            if key=='still_frame':
-                if not "_CR0" in s[1]:
-                    photos[key][img] = title
-            else:
-                photos[key][img] = title
-    return photos
-
-def getMovieStills(imdbId):
-    return getMovieImages(imdbId, ['still_frame'])['still_frame']
-
-def getMoviePosters(imdbId):
-    posters = getMovieImages(imdbId, ['poster'])['poster']
-    poster = getMoviePoster(imdbId)
-    if poster:
-        posters[poster] = 'main poster'
-    return posters
-  
-def getMovieTrivia(imdbId):
-    url = "%strivia" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    trivia = re.compile('<li>(.*?)</li>').findall(data)
-    def clean(t):
-        t = decodeHtml(t)
-        t = t.replace(u'', '"')
-        if t.endswith('<br><br>'):
-            t = t[:-8]
-        return t.strip()
-    trivia = [clean(t) for t in trivia]
-    return trivia 
-
-def getMovieConnections(imdbId):
-    url = "%smovieconnections" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    connections={}
-    for c in re.compile('''<h5>(.*?)</h5>(.*?)\n\n''', re.DOTALL).findall(data):
-        connections[unicode(c[0])] = re.compile('''<a href="/title/tt(\d{7})/">''').findall(c[1])
-    return connections
-
-def getMovieKeywords(imdbId):
-    url = "%skeywords" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    keywords = []
-    for keyword in re.compile('''<a.*?href="/keyword.*?>(.*?)</a>''').findall(data):
-        keyword = decodeHtml(keyword)
-        keyword = keyword.replace(u'\xa0', ' ')
-        keywords.append(keyword)
-    return keywords
-
-def getMovieExternalReviews(imdbId):
-    url = "%sexternalreviews" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    _reviews = re.compile('<li><a href="(.*?)">(.*?)</a></li>').findall(data)
-    reviews = {}
-    for r in _reviews:
-        reviews[r[0]] = r[1]
-    return reviews
-
-def getMovieReleaseDate(imdbId):
-    releasedates = getMovieReleaseDates(imdbId)
-    first_release = None
-    for r in releasedates:
-        if not first_release or r[1] < first_release:
-            first_release = r[1]
-    return first_release
-
-def _parseDate(d):
-    '''
-    >>>_parseDate('3 March 1972')
-    '1972-03-03'
-    '''
-    try:
-        parsed_date = time.strptime(d, "%d %B %Y")
-        parsed_date = '%s-%02d-%02d' % (parsed_date.tm_year, parsed_date.tm_mon, parsed_date.tm_mday)
-        return parsed_date
-    except:
-        try:
-            parsed_date = time.strptime(d, "%B %Y")
-            parsed_date = '%s-%02d-01' % (parsed_date.tm_year, parsed_date.tm_mon)
-            return parsed_date
-        except:
-            pass
-        try:
-            parsed_date = time.strptime(d, "%Y")
-            parsed_date = '%s-01-01' % (parsed_date.tm_year)
-            return parsed_date
-        except:
-            pass
-    return d
-
-def getMovieReleaseDates(imdbId):
-    url = "%sreleaseinfo" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    releasedates = []
-    regexp = '''<tr><td>(.*?)</td>.*?<td align="right">(.*?)</td>.*?<td>(.*?)</td></tr>'''
-
-    for r in re.compile(regexp, re.DOTALL).findall(data):
-        r_ = (stripTags(r[0]).strip(),
-              _parseDate(stripTags(r[1]).strip()),
-              decodeHtml(stripTags(r[2]).strip()))
-        releasedates.append(r_)
-    return releasedates
-
-def getMovieBusinessSum(imdbId):
-    business = getMovieBusiness(imdbId)
-    b_ = {'budget': 0, 'gross': 0, 'profit': 0}
-    if 'budget' in business:
-        #b_['budget'] = sum([int(intValue(i.replace(',', ''))) for i in business['budget']])
-        budget = filter(lambda x: x.startswith('$'), business['budget'])
-        if not budget:
-            budget = business['budget']
-        b_['budget'] = int(intValue(budget[0].replace(',', '')))
-        
-    if 'gross' in business:
-        b_['gross'] = int(intValue(business['gross'][0].replace(',', '')))
-        #b_['gross'] = sum([int(intValue(i.replace(',', ''))) for i in business['gross']])
-        #if 'weekend gross' in business:
-        #    b_['gross'] += sum([int(intValue(i.replace(',', ''))) for i in business['weekend gross']])
-    if b_['budget'] and b_['gross']:
-        b_['profit'] = b_['gross'] - b_['budget']
-    return b_
-
-def getMovieFlimingDates(imdbId):
-    business = getMovieBusiness(imdbId)
-    if 'filming dates' in business and business['filming dates']:
-        return business['filming dates'][0]
-    return ''
-
-def getMovieBusiness(imdbId):
-    url = "%sbusiness" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    business = {}
-    for r in re.compile('''<h5>(.*?)</h5>(.*?)<br/>.<br/>''', re.DOTALL).findall(data):
-        key = stripTags(r[0]).strip().lower()
-        value = [decodeHtml(stripTags(b).strip()) for b in r[1].split('<br/>')]
-        business[key] = value
-    return business
-
-def getMovieEpisodes(imdbId):
-    url = "%sepisodes" % getUrlBase(imdbId)
-    data = getUrlUnicode(url)
-    episodes = {}
-    regexp = r'''<h4>Season (.*?), Episode (.*?): <a href="/title/tt(.*?)/">(.*?)</a></h4>(.*?)</b><br>(.*?)<br/>'''
-    for r in re.compile(regexp, re.DOTALL).findall(data):
-        try:
-            episode = "S%02dE%02d" % (int(r[0]), int(r[1]))
-            episodes[episode] = {}
-            episodes[episode]['imdb'] = r[2]
-            episodes[episode]['title'] = r[3].strip()
-            if episodes[episode]['title'].startswith('Episode #%d'%int(r[0])):
-                episodes[episode]['title'] = u''
-            description = decodeHtml(r[5])
-            description = stripTags(description.split('Next US airings:')[0])
-            episodes[episode]['description'] = description.strip()
-            episodes[episode]['date'] = ''
-            try:
-                d = stripTags(r[4])
-                d = d.replace('Original Air Date: ', '')
-                d = time.strftime("%Y-%m-%d", time.strptime(d, '%d %B %Y'))
-                episodes[episode]['date'] = d
-            except:
-                pass
-        except:
-            import traceback
-            print traceback.print_exc()
-            pass
-    return episodes
-
-'''the old code below'''
-
-class IMDb:
-    def __init__(self, imdbId):
-        self.imdb = imdbId
-        self.pageUrl = getUrlBase(imdbId)
-
-    def getPage(self):
-        return getUrlUnicode(self.pageUrl)
-
-    def parse_raw_value(self, key, value):
-        if key in ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline'):
-            value = stripTags(value).strip()
-        if key == 'runtime':
-            parsed_value = getMovieRuntimeSeconds(self.imdb)
-        elif key in ('country', 'language'):
-            parsed_value = value.split(' / ')
-            if len(parsed_value) == 1:
-                parsed_value = parsed_value[0].split(' | ')
-            parsed_value = [v.strip() for v in parsed_value]
-        elif key == 'genre':
-            parsed_value = value.replace('more', '').strip().split(' / ')
-            if len(parsed_value) == 1:
-                parsed_value = parsed_value[0].split(' | ')
-            parsed_value = [v.strip() for v in parsed_value]
-        elif key == 'tagline':
-            parsed_value = value.replace('more', '').strip()
-        elif key == 'plot_outline':
-            parsed_value = value.replace('(view trailer)', '').strip()
-            if parsed_value.endswith('more'):
-                parsed_value = parsed_value[:-4].strip()
-        elif key == 'tv_series':
-            m = re.compile('<a href="/title/tt(.*?)/">(.*?)</a>').findall(value)
-            if m:
-                parsed_value = m[0][0]
-            else:
-                parsed_value = ''
-        elif key == 'also_known_as':
-            parsed_value = ''
-            m = re.compile('(.*) \(International: English title').findall(value)
-            if m:
-                parsed_value = m[0]
-            else:
-                m = re.compile('(.*) \(USA').findall(value)
-                if m:
-                    parsed_value = m[0]
-            parsed_value = parsed_value.split('<br />')[-1].split('(')[0]
-            director = self.getCredits().get('director', None)
-            if director:
-                director = director[0]
-                parsed_value = parsed_value.replace(director, '')
-            if parsed_value.startswith("'s"):
-                parsed_value = parsed_value[2:].strip()
-            parsed_value = decodeHtml(parsed_value.strip())
-        else:
-            print value
-            parsed_value = value
-        return parsed_value
-
-    def parseYear(self):
-        year = ''
-        data = self.getPage()
-        soup = BeautifulSoup(data)
-        html_title = soup('div', {'id': 'tn15title'})
-        if not html_title:
-            html_title = soup('title')
-        if html_title:
-            html_title = unicode(html_title[0])
-            html_title = stripTags(html_title)
-            year = re.compile('\((\d{4})\)').findall(html_title)
-            if not year:
-                year = re.compile('\((\d{4})/').findall(html_title)
-            if year:
-                year = year[0]
-            else: year = ''
-        return year
-
-    def parse(self):
-        data = self.getPage()
-        IMDbDict ={}
-        info = getMovieInfo(self.imdb)
-        #Poster
-        IMDbDict['poster'] = getMoviePoster(self.imdb)
-        if not IMDbDict['poster']:
-            IMDbDict['poster'] = 'http://i.imdb.com/Heads/npa.gif'
-        #Title, Year
-        IMDbDict['year'] = self.parseYear()
-        IMDbDict['title'] = getMovieTitle(self.imdb)
-
-        #Rating
-        m = re.compile('<b>(.*?)/10</b>', re.IGNORECASE).search(data)
-        if m:
-            IMDbDict['rating'] = int(float(m.group(1)) * 1000)
-        else:
-            IMDbDict['rating'] = -1
-        #Votes
-        IMDbDict['votes'] = info['votes']
-
-        data = data.replace('\n',' ')
-        #some values
-        keys = ('runtime', 'language', 'genre', 'country', 'tagline', 'plot_outline', 'tv_series', 'also_known_as')
-        for key in keys:
-            IMDbDict[key] = ''
-        IMDbDict['runtime'] = 0
-        soup = BeautifulSoup(data)
-        for info in soup('div', {'class': 'info'}):
-            key = unicode(info).split('</h5>')[0].split('<h5>')
-            if len(key) > 1:
-                raw_value = unicode(info).split('</h5>')[1]
-                key = key[1][:-1].lower().replace(' ', '_')
-                if key in keys:
-                    IMDbDict[key] = self.parse_raw_value(key, raw_value)
-        IMDbDict['title_english'] = IMDbDict.pop('also_known_as', IMDbDict['title'])
-        #is episode
-        IMDbDict['episode_of'] = IMDbDict.pop('tv_series', '')
-
-        IMDbDict['episodes'] = getMovieEpisodes(self.imdb)
-        if IMDbDict['episodes']:
-            IMDbDict['tvshow'] = True
-        else:
-            IMDbDict['tvshow'] = False
-        IMDbDict['credits'] = self.getCredits()
-        IMDbDict['plot'] = getMoviePlot(self.imdb)
-        IMDbDict['keywords'] = getMovieKeywords(self.imdb)
-        IMDbDict['trivia'] = getMovieTrivia(self.imdb)
-        IMDbDict['connections'] = getMovieConnections(self.imdb)
-        IMDbDict['locations'] = getMovieLocations(self.imdb)
-        IMDbDict['release_date'] = getMovieReleaseDate(self.imdb)
-        IMDbDict['business'] = getMovieBusinessSum(self.imdb)
-        IMDbDict['reviews'] = getMovieExternalReviews(self.imdb)
-        IMDbDict['stills'] = getMovieStills(self.imdb)
-        #IMDbDict['trailer'] = getMovieTrailer(self.imdb)
-        self.IMDbDict = IMDbDict
-
-        if IMDbDict['episode_of']:
-            episode_of = getMovieInfo(IMDbDict['episode_of'])
-            for key in ('country', 'language'):
-                if not IMDbDict[key]:
-                    IMDbDict[key] = episode_of[key]
-        return self.IMDbDict
-
-    def getCredits(self):
-        raw_credits = getMovieCredits(self.imdb)
-        credits = {}
-
-        def getNames(creditList):
-            return [stripTags(decodeHtml(c[0])) for c in creditList]
-
-        credits['director'] = getNames(raw_credits.get('directors', ''))
-        credits['writer'] = getNames(raw_credits.get('writers', ''))
-        credits['producer'] = getNames(raw_credits.get('producers', ''))
-        credits['cinematographer'] = getNames(raw_credits.get('cinematographers', ''))
-        credits['editor'] = getNames(raw_credits.get('editors', ''))
-        credits['cast'] = [(stripTags(decodeHtml(c[0])),stripTags(decodeHtml(c[1]))) for c in raw_credits.get('cast', [])]
-
-        self.credits = credits
-        return self.credits
-
-
-def guess(title, director=''):
-    #FIXME: proper file -> title
-    title = title.split('-')[0]
-    title = title.split('(')[0]
-    title = title.split('.')[0]
-    title = title.strip()
-    imdb_url = 'http://www.imdb.com/find?q=%s' % quote(title.encode('utf-8'))
-    return_url = ''
-
-    #lest first try google
-    #i.e. site:imdb.com Michael Stevens Sin
-    if director:
-        search = 'site:imdb.com %s "%s"' % (director, title)
-    else:
-        search = 'site:imdb.com "%s"' % title
-    for (name, url, desc) in google.find(search, 2):
-        if url.startswith('http://www.imdb.com/title/tt'):
-             return normalizeImdbId(int(oxlib.intValue(url)))
-
-    try:
-        req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
-        u = urllib2.urlopen(req)
-        data = u.read()
-        return_url = u.url
-        u.close()
-    except:
-        return None
-    if return_url.startswith('http://www.imdb.com/title/tt'):
-        return return_url[28:35]
-    if data:
-        imdb_id = findRe(data.replace('\n', ' '), 'Popular Results.*?<ol><li>.*?<a href="/title/tt(.......)')
-        if imdb_id:
-            return imdb_id
-
-    imdb_url = 'http://www.imdb.com/find?q=%s;s=tt;site=aka' % quote(title.encode('utf-8'))
-    req = urllib2.Request(imdb_url, None, oxlib.net.DEFAULT_HEADERS)
-    u = urllib2.urlopen(req)
-    data = u.read()
-    return_url = u.url
-    u.close()
-    if return_url.startswith('http://www.imdb.com/title/tt'):
-        return return_url[28:35]
-
-    return None
-
-def getEpisodeData(title, episode, show_url = None):
-    '''
-      Collect information about an episode.
-
-      Returns dict with title, show, description and episode
-    '''
-    episodeData = {
-        'title': u'',
-        'show': title,
-        'description': u'',
-        'episode': episode,
-    }
-    description = u''
-    if not show_url:
-        imdbid = guess(title)
-    else:
-        imdbid = "%07d" % int(re.compile('title/tt(\d*)').findall(link)[0])
-    if imdbid:
-        i = IMDb(imdbid).parse()
-        episodeData['title'] = i['episodes'][episode]['title']
-        episodeData['description'] = i['episodes'][episode]['description']
-        episodeData['imdb'] = i['episodes'][episode]['imdb']
-    return episodeData
-
-def getPersonData(imdbId):
-    imdbId = normalizeImdbId(imdbId)
-    url = u'http://www.imdb.com/name/nm%s/' % imdbId
-    data = getUrlUnicode(url)
-    info = dict()
-    info['name'] = findRe(data, u'<title>(.*?)</title>')
-    filmo = data.split(u'<h3>Additional Details</h3>')[0]
-    movies = {}
-    for part in filmo.split(u'<div class="filmo"')[1:]:
-        section = findRe(part, u'a name=".*?">(.*?):</a></h5>')
-        section = decodeHtml(section)
-        movies[section] = re.compile(u'href="/title/tt(\d{7})/"').findall(part)
-    info['movies'] = movies
-    return info
-
-if __name__ == '__main__':
-    import sys
-    #print parse(sys.argv[1])
-    print "imdb:", guess(sys.argv[1])
-
--- a/oxweb/impawards.py
+++ b/oxweb/impawards.py
@ -1,84 +0,0 @@
-# vi:si:et:sw=4:sts=4:ts=4
-# encoding: utf-8
-import re
-
-from oxlib.cache import getUrlUnicode
-from oxlib.html import stripTags
-from oxlib.text import findRe
-
-import imdb
-
-def getData(id):
-    '''
-    >>> getData('1991/silence_of_the_lambs')['imdbId']
-    u'0102926'
-
-    >>> getData('1991/silence_of_the_lambs')['posters'][0]
-    u'http://www.impawards.com/1991/posters/silence_of_the_lambs_ver1_xlg.jpg'
-
-    >>> getData('1991/silence_of_the_lambs')['url']
-    u'http://www.impawards.com/1991/silence_of_the_lambs_ver1.html'
-    '''
-    data = {
-        'url': getUrl(id)
-    }
-    html = getUrlUnicode(data['url'])
-    data['imdbId'] = findRe(html, 'imdb.com/title/tt(.*?) ')
-    data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
-    data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
-    data['posters'] = []
-    results = re.compile('<a href = (%s.*?html)' % id[5:], re.DOTALL).findall(html)
-    for result in results:
-        result = result.replace('_xlg.html', '.html')
-        url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
-        html = getUrlUnicode(url)
-        result = findRe(html, '<a href = (\w*?_xlg.html)')
-        if result:
-            url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
-            html = getUrlUnicode(url)
-            poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img SRC="(.*?)"'))
-        else:
-            poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)" alt='))
-        data['posters'].append(poster)
-    return data
-
-def getId(url):
-    split = url.split('/')
-    year = split[3]
-    split = split[4][:-5].split('_')
-    if split[-1] == 'xlg':
-        split.pop()
-    if findRe(split[-1], 'ver\d+$'):
-        split.pop()
-    id = '%s/%s' % (year, '_'.join(split))
-    return id
-
-def getIds():
-    ids = []
-    html = getUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
-    pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
-    for page in range(pages, 0, -1):
-        for id in getIdsByPage(page):
-            if not id in ids:
-                ids.append(id)
-    return ids
-
-def getIdsByPage(page):
-    ids = []
-    html = getUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1)
-    results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
-    for result in results:
-        url = 'http://impawards.com/%s' % result
-        ids.append(getId(url))
-    return set(ids)
-
-def getUrl(id):
-    url = "http://www.impawards.com/%s.html" % id
-    html = getUrlUnicode(url)
-    if findRe(html, "No Movie Posters on This Page"):
-        url = "http://www.impawards.com/%s_ver1.html" % id
-    return url
-
-if __name__ == '__main__':
-    ids = getIds()
-    print sorted(ids), len(ids)
--- a/oxweb/itunes.py
+++ b/oxweb/itunes.py
@ -1,187 +0,0 @@
-# vi:si:et:sw=4:sts=4:ts=4
-# encoding: utf-8
-import re
-import urllib
-
-from oxlib.cache import getUrl
-from oxlib.html import decodeHtml, stripTags
-from oxlib.text import findRe
-from oxlib.text import findString
-
-
-# to sniff itunes traffic, use something like
-# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net
-
-# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit
-# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit
-
-ITUNES_HEADERS = {
-    'X-Apple-Tz': '0',
-    'X-Apple-Storefront': '143441-1',
-    'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',
-    'Accept-Language': 'en-us, en;q=0.50',
-    'Accept-Encoding': 'gzip',
-    'Connection': 'close',
-}
-
-def composeUrl(request, parameters):
-    if request == 'advancedSearch':
-        url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
-        if parameters['media'] == 'music':
-            url += urllib.urlencode({
-              'albumTerm': parameters['title'],
-              'allArtistNames': parameters['artist'],
-              'composerTerm': '',
-              'flavor': 0,
-              'genreIndex': 1,
-              'media': 'music',
-              'mediaType': 2,
-              'ringtone': 0,
-              'searchButton': 'submit',
-              'songTerm': ''
-            })
-        elif parameters['media'] == 'movie':
-            url += urllib.urlencode({
-              'actorTerm': '',
-              'closedCaption': 0,
-              'descriptionTerm': '',
-              'directorProducerName': parameters['director'],
-              'flavor': 0,
-              'media': 'movie',
-              'mediaType': 3,
-              'movieTerm': parameters['title'],
-              'ratingIndex': 1,
-              'releaseYearTerm': '',
-              'searchButton': 'submit'
-            })
-    elif request == 'viewAlbum':
-        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
-    elif request == 'viewMovie':
-        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']
-    return url
-
-def parseXmlDict(xml):
-    values = {}
-    strings = xml.split('<key>')
-    for string in strings:
-        if string.find('</key>') != -1:
-            key = findRe(string, '(.*?)</key>')
-            type = findRe(string, '</key><(.*?)>')
-            if type == 'true/':
-                value = True
-            else:
-                value = findRe(string, '<%s>(.*?)</%s>' % (type, type))
-                if type == 'integer':
-                  value = int(value)
-                elif type == 'string':
-                  value = decodeHtml(value)
-            values[key] = value
-    return values
-
-def parseCast(xml, title):
-    list = []
-    try:
-        strings = findRe(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
-        strings.pop()
-        for string in strings:
-            list.append(findRe(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
-        return list
-    except:
-        return list
-
-def parseMovies(xml, title):
-    list = []
-    try:
-        strings = findRe(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
-        strings.pop()
-        for string in strings:
-            list.append({
-              'id': findRe(string, 'viewMovie\?id=(.*?)&'),
-              'title': findRe(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
-            })
-        return list
-    except:
-        return list
-
-class ItunesAlbum:
-    def __init__(self, id = '', title = '', artist = ''):
-        self.id = id
-        self.title = title
-        self.artist = artist
-        if not id:
-            self.id = self.getId()
-
-    def getId(self):
-        url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
-        xml = getUrl(url, headers = ITUNES_HEADERS)
-        id = findRe(xml, 'viewAlbum\?id=(.*?)&')
-        return id
-
-    def getData(self):
-        data = {'id': self.id}
-        url = composeUrl('viewAlbum', {'id': self.id})
-        xml = getUrl(url, None, ITUNES_HEADERS)
-        data['albumName'] = findRe(xml, '<B>(.*?)</B>')
-        data['artistName'] = findRe(xml, '<b>(.*?)</b>')
-        data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
-        data['genre'] = findRe(xml, 'Genre:(.*?)<')
-        data['releaseDate'] = findRe(xml, 'Released(.*?)<')
-        data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
-        data['tracks'] = []
-        strings = findRe(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
-        for string in strings:
-          data['tracks'].append(parseXmlDict(string))
-        data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')
-        return data
-
-class ItunesMovie:
-    def __init__(self, id = '', title = '', director = ''):
-        self.id = id
-        self.title = title
-        self.director = director
-        if not id:
-            self.id = self.getId()
-
-    def getId(self):
-        url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
-        xml = getUrl(url, headers = ITUNES_HEADERS)
-        id = findRe(xml, 'viewMovie\?id=(.*?)&')
-        return id
-
-    def getData(self):
-        data = {'id': self.id}
-        url = composeUrl('viewMovie', {'id': self.id})
-        xml = getUrl(url, None, ITUNES_HEADERS)
-        f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
-        f.write(xml)
-        f.close()
-        data['actors'] = parseCast(xml, 'actors')
-        string = findRe(xml, 'Average Rating:(.*?)</HBoxView>')
-        data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
-        data['directors'] = parseCast(xml, 'directors')
-        data['format'] = findRe(xml, 'Format:(.*?)<')
-        data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<'))
-        data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
-        data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
-        data['producers'] = parseCast(xml, 'producers')
-        data['rated'] = findRe(xml, 'Rated(.*?)<')
-        data['relatedMovies'] = parseMovies(xml, 'related movies')
-        data['releaseDate'] = findRe(xml, 'Released(.*?)<')
-        data['runTime'] = findRe(xml, 'Run Time:(.*?)<')
-        data['screenwriters'] = parseCast(xml, 'screenwriters')
-        data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&')
-        data['trailerUrl'] = findRe(xml, 'autoplay="." url="(.*?)"')
-        return data
-
-if __name__ == '__main__':
-    import simplejson
-    data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()
-    print simplejson.dumps(data, sort_keys = True, indent = 4)
-    data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()
-    print simplejson.dumps(data, sort_keys = True, indent = 4)
-    for v in data['relatedMovies']:
-        data = ItunesMovie(id = v['id']).getData()
-        print simplejson.dumps(data, sort_keys = True, indent = 4)
-    data = ItunesMovie(id='272960052').getData()
-    print simplejson.dumps(data, sort_keys = True, indent = 4)
-
--- a/oxweb/karagarga.py
+++ b/oxweb/karagarga.py
@ -1,126 +0,0 @@
-import re
-from oxlib import cache
-from oxlib.html import stripTags
-from oxlib.text import findRe
-
-import auth
-
-
-def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
-    headers = headers.copy()
-    headers["Cookie"] = auth.get("karagarga.cookie")
-    return cache.getUrl(url, data, headers, timeout)
-
-def getUrlUnicode(url, timeout=cache.cache_timeout):
-   return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout)
-
-def getData(id):
-    data = {
-        "url": getUrl(id)
-    }
-    html = getUrlUnicode("%s%s" % (data["url"], "&filelist=1"))
-    if 'No torrent with ID' in html:
-        return False
-    data['added'] = stripTags(parseTable(html, 'Added'))
-    data['country'] = findRe(html, 'title="([\w ]*?)" border="0" width="32" height="20"')
-    # data['description'] = parseTable(html, 'Description')
-    data['director'] = stripTags(parseTable(html, 'Director / Artist'))
-    data['files'] = []
-    result = findRe(html, '<table class=main border="1" cellspacing=0 cellpadding="5">(.*?)</table>')
-    results = re.compile('<td>(.*?)</td><td align="right">(.*?)</td>', re.DOTALL).findall(result)
-    for name, size in results:
-        data['files'].append({
-            'name': name,
-            'size': '%s %s' % (size[:-2], size[-2:].strip().upper())
-        })
-    data['format'] = ''
-    if html.find('genreimages/dvdr.png') != -1:
-        data['format'] = 'DVD'
-    elif html.find('genreimages/hdrip.png') != -1:
-        data['format'] = 'HD'
-    data['genre'] = []
-    result = parseTable(html, 'Genres')
-    for string in result.split('\n'):
-        string = stripTags(findRe(string, '<a href="browse.php\?genre=.*?">(.*?)</a>'))
-        if string:
-            data['genre'].append(string)
-    data['id'] = id
-    data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
-    data['language'] = stripTags(parseTable(html, 'Language'))
-    data['leechers'] = int(findRe(html, 'seeder\(s\), (.*?) leecher\(s\)'))
-    data['link'] = stripTags(parseTable(html, 'Internet Link'))
-    data['links'] = []
-    results = re.compile('<a href="(.*?)">(.*?)</a>', re.DOTALL).findall(parseTable(html, 'Description'))
-    for (url, title) in results:
-        if url.find('javascript') == -1:
-            data['links'].append({
-                'title': title,
-                'url': url.replace('http://anonym.to/?', '')
-            })
-    data['people'] = 0
-    result = stripTags(findRe(html, '<a href="top10others.php.*?>(.*?) people')).strip()
-    if result:
-        data['people'] = int(result)
-    data['posters'] = []
-    results = re.compile('<img border=0 src="(http://.*?)"', re.DOTALL).findall(html)
-    for result in results:
-        data['posters'].append(result)
-    data['seeders'] = int(findRe(html, '#seeders" class="sublink".*?colspan=2>(.*?) seeder\(s\)'))
-    data['size'] = int(findRe(parseTable(html, 'Size'), '\((.*?) ').replace(',', ''))
-    data['snatched'] = int(findRe(html, '<a name="snatchers">.*?colspan=2>(.*?) '))
-    data['subtitle'] = findRe(parseTable(html, 'Subtitles'), '>(.*?)<hr>').replace('included: ', '')
-    data['subtitles'] = []
-    results = re.compile('<a href="(.*?)">(.*?)</a>', re.DOTALL).findall(parseTable(html, 'Subtitles'))
-    for (url, language) in results:
-        data['subtitles'].append({
-            'language': language.replace('click here for ', ''),
-            'url': url
-        })
-    data['torrent'] = 'http://karagarga.net/%s' % findRe(html, '(down.php/.*?)"')
-    data['year'] = stripTags(parseTable(html, 'Year'))
-    data['title'] = stripTags(findRe(html, '<h1>(.*?)</h1>')).strip()
-    data['title'] = re.sub('^%s - ' % re.escape(data['director']), '', data['title'])
-    data['title'] = re.sub(' \(%s\)$' % re.escape(data['year']), '', data['title'])    
-    return data
-
-def getId(url):
-    return url.split("=")[-1]
-
-def getTorrent(id):
-    return _getUrl(getData(id)['torrent'])
-
-def getIds(lastId = 20):
-    lastId = '%s' % lastId
-    ids = []
-    page = 0
-    while True:
-        for id in getIdsByPage(page):
-            if not id in ids:
-                ids.append(id)
-        if lastId in ids:
-            break
-        page += 1
-    return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
-
-def getIdsByPage(page):
-    ids = []
-    url = 'http://karagarga.net/browse.php?page=%s&cat=1&sort=added&d=DESC' % page
-    html = getUrlUnicode(url, timeout = 23*60*60) #get new ids once per day
-    strings = html.split('<td width="42" style="padding:0px;">')
-    strings.pop(0)
-    for string in strings:
-        ids.append(findRe(string, '"details.php\?id=(.*?)"'))
-    return ids
-
-def getUrl(id):
-    return "http://karagarga.net/details.php?id=%s" % id
-
-def parseTable(html, title):
-    if title == 'Genres':
-        return findRe(html, '<td class="heading" [\w=" ]*?>%s</td>(.*?)</table>' % title)
-    else:
-        return findRe(html, '<td class="heading" [\w=" ]*?>%s</td>(.*?)</td>' % title)
-
-if __name__ == "__main__":
-    print getIds("79317")
-    print getData("79317")
--- a/oxweb/lyricsfly.py
+++ b/oxweb/lyricsfly.py
@ -1,21 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from oxlib.cache import getUrl
-from oxlib.html import decodeHtml
-from oxlib.text import findRe
-
-
-def getLyrics(title, artist):
-    html = getUrl('http://lyricsfly.com/api/')
-    key = findRe(html, '<font color=green><b>(.*?)</b></font>')
-    url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
-    xml = getUrl(url)
-    lyrics = findRe(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
-    lyrics = lyrics.replace('\n', '').replace('\r', '')
-    lyrics = lyrics.replace('[br]', '\n').strip()
-    lyrics.replace('\n\n\n', '\n\n')
-    lyrics = decodeHtml(lyrics.replace('&amp;', '&'))
-    return lyrics
-
-if __name__ == '__main__':
-    print getLyrics('Election Day', 'Arcadia')
--- a/oxweb/metacritic.py
+++ b/oxweb/metacritic.py
@ -1,45 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-from urllib import quote
-
-from oxlib.cache import getUrl, getUrlUnicode
-from oxlib import findRe, decodeHtml, stripTags
-
-
-def getMetacriticShowUrl(title):
-    title = quote(title)
-    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
-    data = getUrl(url)
-    return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
-
-def getData(title, url=None):
-  if not url:
-    url = getMetacriticShowUrl(title)
-  if not url:
-    return None
-  data = getUrlUnicode(url)
-  score = findRe(data, 'ALT="Metascore: (.*?)"')
-  if score: 
-    score = int(score)
-  else: 
-    score = -1
-
-  reviews = re.compile(
-            '<div class="scoreandreview"><div class="criticscore">(.*?)</div>'
-            '.*?<span class="publication">(.*?)</span>'
-            '.*?<span class="criticname">(.*?)</span></div>'
-            '.*?<div class="quote">(.*?)<br>'
-            '.*?<a href="(.*?)" ', re.DOTALL).findall(data)
-
-  metacritics = []
-  for review in reviews:
-    metacritics.append({
-        'score': int(review[0]),
-        'publication':review[1],
-        'critic':decodeHtml(review[2]),
-        'quote': stripTags(review[3]).strip(),
-        'link': review[4],
-    })
-  return dict(score = score, critics = metacritics, url = url)
-
--- a/oxweb/mininova.py
+++ b/oxweb/mininova.py
@ -1,126 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from datetime import datetime
-import re
-import socket
-from urllib import quote
-
-from oxlib.cache import getUrl, getUrlUnicode
-from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
-from oxlib.normalize import normalizeImdbId
-import oxlib
-
-from torrent import Torrent
-
-
-def _parseResultsPage(data, max_results=10):
-    results=[]
-    regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
-    for row in  re.compile(regexp, re.DOTALL).findall(data):
-        torrentDate = row[0]
-        torrentExtra = row[1]
-        torrentId = row[2]
-        torrentTitle = decodeHtml(row[3]).strip()
-        torrentLink = "http://www.mininova.org/tor/" + torrentId
-        privateTracker = 'priv.gif' in torrentExtra
-        if not privateTracker:
-            results.append((torrentTitle, torrentLink, ''))
-    return results
-
-def findMovie(query, max_results=10):
-    '''search for torrents on mininova
-    '''
-    url = "http://www.mininova.org/search/%s/seeds" % quote(query)
-    data = getUrlUnicode(url)
-    return _parseResultsPage(data, max_results)
-
-def findMovieByImdb(imdbId):
-    '''find torrents on mininova for a given imdb id
-    '''
-    results = []
-    imdbId = normalizeImdbId(imdbId)
-    data = getUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
-    return _parseResultsPage(data)
-
-def getId(mininovaId):
-    mininovaId = unicode(mininovaId)
-    d = findRe(mininovaId, "/(\d+)")
-    if d:
-        return d
-    mininovaId = mininovaId.split('/')
-    if len(mininovaId) == 1:
-        return mininovaId[0]
-    else:
-        return mininovaId[-1]
-
-def exists(mininovaId):
-    mininovaId = getId(mininovaId)
-    data = oxlib.net.getUrl("http://www.mininova.org/tor/%s" % mininovaId)
-    if not data or 'Torrent not found...' in data:
-        return False
-    if 'tracker</a> of this torrent requires registration.' in data:
-        return False
-    return True
-
-def getData(mininovaId):
-    _key_map = {
-        'by': u'uploader',
-    }
-    mininovaId = getId(mininovaId)
-    torrent = dict()
-    torrent[u'id'] = mininovaId
-    torrent[u'domain'] = 'mininova.org'
-    torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
-    torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
-    torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
-
-    data = getUrlUnicode(torrent['comment_link']) + getUrlUnicode(torrent['details_link'])
-    if '<h1>Torrent not found...</h1>' in data:
-        return None
-
-    for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
-        key = d[0].lower().strip()
-        key = _key_map.get(key, key)
-        value = decodeHtml(stripTags(d[1].strip()))
-        torrent[key] = value
-
-    torrent[u'title'] = findRe(data, '<title>(.*?):.*?</title>')
-    torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
-    torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
-    if torrent['description']:
-        torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
-    t = getUrl(torrent[u'torrent_link'])
-    torrent[u'torrent_info'] = getTorrentInfo(t)
-    return torrent
-
-class Mininova(Torrent):
-    '''
-    >>> Mininova('123')
-    {}
-    >>> Mininova('1072195')['infohash']
-    '72dfa59d2338e4a48c78cec9de25964cddb64104'
-    '''
-    def __init__(self, mininovaId):
-        self.data = getData(mininovaId)
-        if not self.data:
-            return
-        Torrent.__init__(self)
-        ratio = self.data['share ratio'].split(',')
-        self['seeder'] = -1
-        self['leecher'] = -1
-        if len(ratio) == 2:
-            val = intValue(ratio[0].replace(',','').strip())
-            if val:
-                self['seeder'] = int(val)
-            val = intValue(ratio[1].replace(',','').strip())
-            if val:
-                self['leecher'] = int(val)
-        val = intValue(self.data['downloads'].replace(',','').strip())
-        if val:
-            self['downloaded'] = int(val)
-        else:
-            self['downloaded'] = -1
-        published =  self.data['added on']
-        published = published.split(' +')[0]
-        self['published'] =  datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")
-
--- a/oxweb/movieposterdb.py
+++ b/oxweb/movieposterdb.py
@ -1,44 +0,0 @@
-# -*- coding: UTF-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-
-import re
-
-from oxlib.cache import getUrlUnicode
-from oxlib import findRe
-
-def getData(id):
-    '''
-    >>> getData('0060304')['posters'][0]
-    u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg'
-    >>> getData('0123456')['posters']
-    []
-    '''
-    data = {
-        "url": getUrl(id)
-    }
-    data["posters"] = getPostersByUrl(data["url"])
-    return data
-
-def getId(url):
-    return url.split("/")[-2]
-
-def getPostersByUrl(url, group=True):
-    posters = []
-    html = getUrlUnicode(url)
-    if url in html:
-        if group:
-            results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
-            for result in results:
-                posters += getPostersByUrl(result, False)
-        results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
-        for result in results:
-            html = getUrlUnicode(result)
-            posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
-    return posters
-
-def getUrl(id):
-    return "http://www.movieposterdb.com/movie/%s/" % id
-
-if __name__ == '__main__':
-    print getData('0060304')
-    print getData('0133093')
--- a/oxweb/opensubtitles.py
+++ b/oxweb/opensubtitles.py
@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-
-import feedparser
-from oxlib.cache import getUrl, getUrlUnicode
-import oxlib
-from oxlib import langCode2To3, langTo3Code
-
-def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
-    if len(language) == 2:
-        language = langCode2To3(language)
-    elif len(language) != 3:
-        language = langTo3Code(language)
-    url = "http://www.opensubtitles.org/en/search/"
-    if language:
-        url += "sublanguageid-%s/" % language
-    url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
-    data = getUrl(url)
-    if "title>opensubtitles.com - search results</title" in data:
-        fd = feedparser.parse(data)
-        opensubtitleId = None
-        if fd.entries:
-            link = fd.entries[0]['links'][0]['href']
-            opensubtitleId = re.compile('subtitles/(.*?)/').findall(link)
-            if opensubtitleId:
-                opensubtitleId = opensubtitleId[0]
-    else:
-        opensubtitleId = oxlib.findRe(data, '/en/subtitles/(.*?)/')
-    return opensubtitleId
-
-def downloadSubtitleById(opensubtitle_id):
-    srts = {}
-    data = getUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
-    reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
-    for f in re.compile(reg_exp, re.DOTALL).findall(data):
-        name = oxlib.stripTags(f[1]).split('\n')[0]
-        url = "http://www.opensubtitles.com%s" % f[0]
-        srts[name] = getUrlUnicode(url)
-    return srts
-
--- a/oxweb/oxdb.py
+++ b/oxweb/oxdb.py
@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import oxlib.cache
-
-def getPosterUrl(id):
-    url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
-    if oxlib.cache.exists(url):
-        return url
-    return ''
-
--- a/oxweb/piratecinema.py
+++ b/oxweb/piratecinema.py
@ -1,12 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import oxlib.cache
-from oxlib.cache import exists
-
-
-def getPosterUrl(id):
-    url = "http://piratecinema.org/posters/%s/%s.jpg" % (id[:4], id)
-    if oxlib.cache.exists(url):
-        return url
-    return ''
-
--- a/oxweb/rottentomatoes.py
+++ b/oxweb/rottentomatoes.py
@ -1,34 +0,0 @@
-# -*- coding: UTF-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-
-from oxlib.cache import getHeaders, getUrl, getUrlUnicode
-from oxlib import findRe, stripTags
-
-
-def getUrlByImdb(imdb):
-    #this would also wor but does not cache:
-    '''
-    from urllib2 import urlopen
-    u = urlopen(url)
-    return u.url
-    '''
-    url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
-    data = getUrl(url)
-    if "movie_title" in data:
-        movies = re.compile('(/m/.*?/)').findall(data)
-        if movies:
-            return "http://www.rottentomatoes.com" + movies[0]
-    return None
-
-def getData(url):
-    data = getUrlUnicode(url)
-    r = {}
-    r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
-    if '(' in r['title']:
-        r['year'] = findRe(r['title'], '\((\d*?)\)')
-        r['title'] = re.sub('\((\d*?)\)', '', r['title']).strip()
-    r['synopsis'] = findRe(data, '<span id="movie_synopsis_all".*?>(.*?)</span>')
-    r['average rating'] = findRe(data, '<div id="bubble_allCritics".*?>(.*?)</div>').strip()
-    return r
-
--- a/oxweb/spiegel.py
+++ b/oxweb/spiegel.py
@ -1,293 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from datetime import datetime
-import re
-import time
-
-from BeautifulSoup import BeautifulSoup
-
-import oxlib.cache
-from oxlib.html import decodeHtml, stripTags
-import oxlib.net
-
-
-def getNews(year, month, day):
-    sections = [
-        'politik', 'wirtschaft', 'panorama', 'sport', 'kultur', 'netzwelt',
-        'wissenschaft', 'unispiegel', 'schulspiegel', 'reise', 'auto'
-    ]
-    dt = datetime(year, month, day)
-    day = int(dt.strftime('%j'))
-    date = dt.strftime('%d.%m.%Y')
-    news = []
-    for section in sections:
-        url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
-        if date == time.strftime('%d.%m.%Y', time.localtime()):
-            html = oxlib.net.getUrl(url)
-        else:
-            html = oxlib.cache.getUrl(url)
-        for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
-            dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
-            try:
-                description = formatString(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
-            except:
-                description = ''
-            try:
-                imageUrl = re.compile('<img src="(.*?)"').findall(item)[0]
-            except:
-                imageUrl = ''
-            try:
-                title = formatString(re.compile('alt=[\'|"](.*?)[\'|"] title=', re.DOTALL).findall(item)[0]).replace(' : ', ': ').replace('::', ':')
-            except:
-                title = ''
-            if dateString[:10] == date and description and imageUrl and title.find(': ') != -1:
-                new = {}
-                if len(dateString) == 10:
-                    new['date'] = '%s-%s-%s 00:00' % (dateString[6:10], dateString[3:5], dateString[:2])
-                else:
-                    new['date'] = '%s-%s-%s %s:%s' % (dateString[6:10], dateString[3:5], dateString[:2], dateString[12:14], dateString[15:17])
-                # fix decodeHtml
-                # new['description'] = formatString(decodeHtml(description))
-                new['description'] = formatString(description)
-                new['imageUrl'] = imageUrl
-                new['section'] = formatSection(section)
-                new['title'] = formatString(title)
-                new['title1'] = new['title'].replace('\xdf', '\xdf\xdf')[:len(formatString(re.compile('<h4>(.*?)</h4>', re.DOTALL).findall(item)[0]))].replace('\xdf\xdf', '\xdf')
-                if new['title1'][-1:] == ':':
-                    new['title1'] = new['title1'][0:-1]
-                new['title2'] = new['title'][len(new['title1']) + 2:]
-                new['url'] = re.compile('<a href="(.*?)"').findall(item)[0]
-                if new['url'][:1] == '/':
-                    new['url'] = 'http://www.spiegel.de' + new['url']
-                news.append(new)
-                # print '%s, %s' % (new['section'], dateString)
-            '''
-            elif dateString[:10] == date and not description:
-                print dateString + ' - no description'
-            elif dateString[:10] == date and not imageUrl:
-                print dateString + ' - no image'
-            '''
-    return news
-
-def splitTitle(title):
-    title1 = re.compile('(.*?): ').findall(title)[0]
-    title2 = re.compile(': (.*?)$').findall(title)[0]
-    return [title1, title2]
-
-def formatString(string):
-    string = string.replace('<span class="spOptiBreak"> </span>', '')
-    string = string.replace('\n', ' ').replace('  ', ' ').strip()
-    string = string.replace('&amp;', '&').replace('&apos;', '\'').replace('&quot;', '"')
-    return string
-
-def formatSection(string):
-    return string[:1].upper() + string[1:].replace('spiegel', 'SPIEGEL')
-
-def formatSubsection(string):
-    # SPIEGEL, SPIEGEL special
-    subsection = {
-        'abi': 'Abi - und dann?',
-        'formel1': 'Formel 1',
-        'jobundberuf': 'Job & Beruf',
-        'leben': 'Leben U21',
-        'mensch': 'Mensch & Technik',
-        'sonst': '',
-        'staedte': u'St\xc3dte',
-        'ussports': 'US-Sports',
-        'wunderbar': 'wunderBAR'
-    }
-    if subsection.has_key(string):
-        return subsection[string].replace(u'\xc3', 'ae')
-    return string[:1].upper() + string[1:]
-        
-def getIssue(year, week):
-    coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
-    if not oxlib.net.exists(coverUrl):
-        return None
-    url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
-    contents = []
-    soup = BeautifulSoup(oxlib.cache.getUrl(url))
-    for item in soup('a', {'href': re.compile('http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=')}):
-        item = str(item)
-        page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
-        title = stripTags(item).strip()
-        contents.append({'title': title, 'page': page})
-    pageUrl = {}
-    pages = page + 2
-    for page in range(1, pages + 10):
-        url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
-        if oxlib.cache.exists(url):
-            pageUrl[page] = url
-        else:
-            pageUrl[page] = ''
-    return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl}
-
-
-def archiveIssues():
-    '''
-    this is just an example of an archiving application
-    '''
-    p = {}
-    import os
-    import simplejson
-    import time
-    archivePath = '/Volumes/Rolux Home/Desktop/Data/spiegel.de/Der Spiegel'
-    localtime = time.localtime()
-    year = int(time.strftime('%Y', localtime))
-    week = int(time.strftime('%W', localtime))
-    for y in range(year, 1993, -1):
-        if y == year:
-            wMax = week + 1
-        else:
-            wMax = 53
-        for w in range(wMax, 0, -1):
-            print 'getIssue(%d, %d)' % (y, w)
-            issue = getIssue(y, w)
-            if issue:
-                dirname = '%s/%d/%02d' % (archivePath, y, w)
-                if not os.path.exists(dirname):
-                    os.makedirs(dirname)
-                filename = '%s/Der Spiegel %d %02d.json' % (dirname, y, w)
-                if not os.path.exists(filename):
-                    data = simplejson.dumps(issue, ensure_ascii = False)
-                    f = open(filename, 'w')
-                    f.write(data)
-                    f.close()
-                filename = '%s/Der Spiegel %d %02d.txt' % (dirname, y, w)
-                if not os.path.exists(filename):
-                    data = []
-                    for item in issue['contents']:
-                        data.append('%3d %s' % (item['page'], item['title']))
-                    data = '\n'.join(data)
-                    f = open(filename, 'w')
-                    f.write(data)
-                    f.close()
-                filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
-                if not os.path.exists(filename):
-                    data = oxlib.cache.getUrl(issue['coverUrl'])
-                    f = open(filename, 'w')
-                    f.write(data)
-                    f.close()
-                for page in issue['pageUrl']:
-                    url = issue['pageUrl'][page]
-                    if url:
-                        filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
-                        if not os.path.exists(filename):
-                            data = oxlib.cache.getUrl(url)
-                            f = open(filename, 'w')
-                            f.write(data)
-                            f.close()
-                if not p:
-                    p = {'num': 1, 'sum': issue['pages'], 'min': issue['pages'], 'max': issue['pages']}
-                else:
-                    p['num'] += 1
-                    p['sum'] += issue['pages']
-                    if issue['pages'] < p['min']:
-                        p['min'] = issue['pages']
-                    if issue['pages'] > p['max']:
-                        p['max'] = issue['pages']
-                print p['min'], p['sum'] / p['num'], p['max']
-            
-
-def archiveNews():
-    '''
-    this is just an example of an archiving application
-    '''
-    import os
-    import simplejson
-    import time
-
-    count = {}
-    colon = []
-
-    archivePath = '/Volumes/Rolux Home/Desktop/Data/spiegel.de/Spiegel Online'
-    days = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
-    localtime = time.localtime()
-    year = int(time.strftime('%Y', localtime))
-    month = int(time.strftime('%m', localtime))
-    day = int(time.strftime('%d', localtime)) - 1
-    for y in range(year, 1999, -1):
-        if y == year:
-            mMax = month
-        else:
-            mMax = 12
-        for m in range(mMax, 0, -1):
-            if y == year and m == month:
-                dMax = day
-            elif m == 2 and y % 4 == 0 and y % 400 != 0:
-                dMax = days[m] + 1
-            else:
-                dMax = days[m]
-            for d in range(dMax, 0, -1):
-                print 'getNews(%d, %d, %d)' % (y, m, d)
-                news = getNews(y, m ,d)
-                for new in news:
-                    dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
-                    if not os.path.exists(dirname):
-                        os.makedirs(dirname)
-                    if new['url'][-5:] == '.html':
-                        filename = dirname + '/' + new['url'].split('/')[-1][:-5] + '.json'
-                    else:
-                        filename = dirname + '/' + new['url'] + '.json'
-                    if not os.path.exists(filename) or True:
-                        data = simplejson.dumps(new, ensure_ascii = False)
-                        f = open(filename, 'w')
-                        f.write(data)
-                        f.close()
-                    filename = filename[:-5] + '.txt'
-                    if not os.path.exists(filename) or True:
-                        data = splitTitle(new['title'])
-                        data.append(new['description'])
-                        data = '\n'.join(data)
-                        f = open(filename, 'w')
-                        f.write(data)
-                        f.close()
-                    filename = dirname + '/' + new['imageUrl'].split('/')[-1]
-                    if not os.path.exists(filename):
-                        data = oxlib.cache.getUrl(new['imageUrl'])
-                        f = open(filename, 'w')
-                        f.write(data)
-                        f.close()
-
-                    strings = new['url'].split('/')
-                    string = strings[3]
-                    if len(strings) == 6:
-                        string += '/' + strings[4]
-                    if not count.has_key(string):
-                        count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
-                    else:
-                        count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
-                    strings = splitTitle(new['title'])
-                    if strings[0] != new['title1'] or strings[1] != new['title2']:
-                        colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
-            for key in sortDictByKey(count):
-                print '%6d %-24s %s' % (count[key]['count'], key, count[key]['string'])
-            for value in colon:
-                print value
-
-def sortDictByKey(d):
-    keys = d.keys()
-    keys.sort()
-    return keys
-
-if __name__ == '__main__':
-    # spiegel = Spiegel(2008, 8)
-    # print spiegel.getContents()
-    # news = News(2001, 9, 10)
-    # output(news.getNews())
-    '''
-    x = []
-    for d in range(10, 30):
-        print '2/%d' % d
-        news = getNews(2008, 2, d)
-        for new in news:
-            strings = new['url'].split('/')
-            string = formatSection(strings[3])
-            if len(strings) == 6:
-                string += '/' + formatSubsection(strings[4])
-            if not string in x:
-                x.append(string)
-        print x
-    '''
-    # archiveIssues()
-    archiveNews()
--- a/oxweb/thepiratebay.py
+++ b/oxweb/thepiratebay.py
@ -1,122 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from datetime import datetime
-import re
-import socket
-from urllib import quote, urlencode
-from urllib2 import URLError
-
-from oxlib.cache import getUrl, getUrlUnicode
-from oxlib import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
-from oxlib.normalize import normalizeImdbId
-import oxlib
-
-from torrent import Torrent
-
-cache_timeout = 24*60*60 # cache search only for 24 hours
-
-season_episode = re.compile("S..E..", re.IGNORECASE)
-
-
-def _getUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
-    headers = headers.copy()
-    headers['Cookie'] = 'language=en_EN'
-    return cache.getUrl(url, data, headers, timeout)
-
-def _getUrlUnicode(url, timeout=cache.cache_timeout):
-   return cache.getUrlUnicode(url, _getUrl=_getUrl, timeout=timeout)
-
-def findMovies(query, max_results=10):
-    results = []
-    next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
-    page_count = 1
-    while next and page_count < 4:
-        page_count += 1
-        url = next[0]
-        if not url.startswith('http'):
-            if not url.startswith('/'):
-                url = "/" + url
-            url = "http://thepiratebay.org" + url
-        data = _getUrlUnicode(url, timeout=cache_timeout)
-        regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
-        for row in  re.compile(regexp, re.DOTALL).findall(data):
-            torrentType = row[0]
-            torrentLink = "http://thepiratebay.org" + row[1]
-            torrentTitle = decodeHtml(row[2])
-            # 201 = Movies , 202 = Movie DVDR, 205 TV Shows
-            if torrentType in ['201']:
-                results.append((torrentTitle, torrentLink, ''))
-            if len(results) >= max_results:
-                return results
-        next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(data)
-    return results
-
-def findMovieByImdb(imdb):
-    return findMovies("tt" + normalizeImdbId(imdb))
-
-def getId(piratebayId):
-    if piratebayId.startswith('http://torrents.thepiratebay.org/'):
-        piratebayId = piratebayId.split('org/')[1]
-    d = findRe(piratebayId, "tor/(\d+)")
-    if d:
-        piratebayId = d
-    d = findRe(piratebayId, "torrent/(\d+)")
-    if d:
-        piratebayId = d
-    return piratebayId
-
-def exists(piratebayId):
-    piratebayId = getId(piratebayId)
-    return oxlib.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
-
-def getData(piratebayId):
-    _key_map = {
-      'spoken language(s)': u'language',
-      'texted language(s)': u'subtitle language',
-      'by': u'uploader',
-      'leechers': 'leecher',
-      'seeders': 'seeder',
-    }
-    piratebayId = getId(piratebayId)
-    torrent = dict()
-    torrent[u'id'] = piratebayId
-    torrent[u'domain'] = 'thepiratebay.org'
-    torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
-
-    data = _getUrlUnicode(torrent['comment_link'])
-    torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
-    if not torrent[u'title']:
-        return None
-    torrent[u'title'] = decodeHtml(torrent[u'title']).strip()
-    torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
-    title = quote(torrent['title'].encode('utf-8'))
-    torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, title)
-    for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
-        key = d[0].lower().strip()
-        key = _key_map.get(key, key)
-        value = decodeHtml(stripTags(d[1].strip()))
-        torrent[key] = value
-    torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
-    if torrent[u'description']:
-        torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
-    t = _getUrl(torrent[u'torrent_link'])
-    torrent[u'torrent_info'] = getTorrentInfo(t)
-    return torrent
-
-class Thepiratebay(Torrent):
-    '''
-    >>> Thepiratebay('123')
-    {}
-
-    >>> Thepiratebay('3951349')['infohash']
-    '4e84415d36ed7b54066160c05a0b0f061898d12b'
-    '''
-    def __init__(self, piratebayId):
-        self.data = getData(piratebayId)
-        if not self.data:
-            return
-        Torrent.__init__(self)
-        published =  self.data['uploaded']
-        published = published.replace(' GMT', '').split(' +')[0]
-        self['published'] =  datetime.strptime(published, "%Y-%m-%d %H:%M:%S")
-
--- a/oxweb/torrent.py
+++ b/oxweb/torrent.py
@ -1,37 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from oxlib import intValue
-
-
-class Torrent(dict):
-    '''
-    >>> Torrent()
-    {'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
-    '''
-    _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', 
-                   'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
-    _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
-    _dict_keys = ('torrent_info', )
-    _list_keys = ()
-    data = {'torrent_info': {}}
-
-    def __init__(self):
-        for key in self._string_keys:
-            self[key] = self.data.get(key, u'')
-        for key in self._dict_keys:
-            self[key] = self.data.get(key, {})
-        for key in self._list_keys:
-            self[key] = self.data.get(key, [])
-        for key in self._int_keys:
-            value = self.data.get(key, -1)
-            if not isinstance(value, int):
-                value = int(intValue(value))
-            self[key] = value
-        self['infohash'] = self.data['torrent_info'].get('hash', '')
-        self['size'] = self.data['torrent_info'].get('size', -1)
-        self['announce'] = self.data['torrent_info'].get('announce', '')
-        if 'files' in self.data['torrent_info']:
-            self['files'] = len(self.data['torrent_info']['files'])
-        else:
-            self['files'] =  1
-
--- a/oxweb/tv.py
+++ b/oxweb/tv.py
@ -1,32 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-import re
-import time
-
-from oxlib import stripTags, findRe
-from oxlib.cache import getUrlUnicode
-
-
-def getEpisodeData(url):
-    '''
-      prases informatin on tvcom episode pages
-      returns dict with title, show, description, score
-      example:
-        getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
-    '''
-    data = getUrlUnicode(url)
-    r = {}
-    r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
-    r['show'] = findRe(data, '<h1>(.*?)</h1>')
-    r['title'] =  findRe(data, '<title>.*?: (.*?) - TV.com  </title>')
-    #episode score
-    r['episode score'] = findRe(data, '<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">(.*?)</span>')
-
-    match = re.compile('Episode Number: (\d*?) &nbsp;&nbsp; Season Num: (\d*?) &nbsp;&nbsp; First Aired: (.*?) &nbsp').findall(data) 
-    if match:
-        r['season'] = int(match[0][1])
-        r['episode'] = int(match[0][0])
-        #'Wednesday September 29, 2004' -> 2004-09-29 
-        r['air date'] = time.strftime('%Y-%m-%d', time.strptime(match[0][2], '%A %B %d, %Y'))
-    return r
-
--- a/oxweb/wikipedia.py
+++ b/oxweb/wikipedia.py
@ -1,121 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from urllib import urlencode
-
-import simplejson
-from oxlib.cache import getUrlUnicode
-from oxlib import findRe, decodeHtml
-
-
-def getId(url):
-    return url.split("/")[-1]
-
-def getUrl(id):
-    return "http://en.wikipedia.org/wiki/%s" % id
-
-
-def getMovieId(title, director='', year=''):
-    query = '"%s" film %s %s' % (title, director, year)
-    result = find(query, 1)
-    if result:
-        return result[0][1]
-    return ''
-
-def getUrlByImdbId(imdbId):
-    query = '"%s"'% imdbId
-    result = find(query)
-    if result:
-        url = result[0][1]
-        return url
-    return ""
-
-def getUrlByImdb(imdbId):
-    # deprecated, use getUrlByImdbId()
-    return getUrlByImdbId(imdbId)
-
-def getUrlByAllmovieId(allmovieId):
-    query = '"amg_id = 1:%s"'% allmovieId
-    result = find(query)
-    if result:
-        url = result[0][1]
-        return url
-    return ''
-
-def getWikiData(wikipediaUrl):
-    url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
-    url = "%s&action=raw" % url
-    data = getUrlUnicode(url)
-    return data
-
-def getMovieData(wikipediaUrl):
-    if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
-    data = getWikiData(wikipediaUrl)
-    filmbox_data = findRe(data, '''\{\{Infobox.Film(.*?)\n\}\}''')
-    filmbox = {}
-    _box = filmbox_data.strip().split('\n|')
-    if len(_box) == 1:
-        _box = _box[0].split('|\n')
-    for row in _box:
-        d = row.split('=')
-        if len(d) == 2:
-            key = d[0].strip()
-            if key[0] == '|':
-                key = key[1:]
-            value = d[1].strip()
-            filmbox[key] = value
-    if 'imdb title' in data:
-        filmbox['imdb_id'] = findRe(data, 'imdb title\|.*?(\d*?)\|')
-    elif 'imdb episode' in data:
-        filmbox['imdb_id'] = findRe(data, 'imdb episode\|.*?(\d*?)\|')
-    if 'Amg movie' in data:
-        filmbox['amg_id'] = findRe(data, 'Amg movie\|.*?(\d*?)\|')
-    if 'amg_id' in filmbox and filmbox['amg_id'].startswith('1:'):
-        filmbox['amg_id'] = filmbox['amg_id'][2:]
-
-    if 'rotten-tomatoes' in data:
-        filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|id\=(.*?)\|')
-        if not filmbox['rottentomatoes_id']:
-            filmbox['rottentomatoes_id'] = findRe(data, 'rotten-tomatoes\|(.*?)\|')
-    if 'google video' in data:
-        filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)\|')
-    if 'DEFAULTSORT' in data:
-        filmbox['title_sort'] = findRe(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
-    return filmbox
-
-def getImageUrl(name):
-    data = getUrlUnicode('http://en.wikipedia.org/wiki/Image:' + name)
-    url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"')
-    return url
-
-def getPosterUrl(wikipediaUrl):
-    if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
-    data = getMovieData(wikipediaUrl)
-    if 'image' in data:
-        return getImageUrl(data['image'])
-    return ''
-
-def getMoviePoster(wikipediaUrl):
-    # deprecated, use getPosterUrl()
-    return getPosterUrl(wikipediaUrl)
-
-def getAllmovieId(wikipediaUrl):
-    data = getMovieData(wikipediaUrl)
-    return data.get('amg_id', '')
-
-def find(query, max_results=10):
-    from oxlib.cache import getUrl
-    query = {'action': 'query', 'list':'search', 'format': 'json',
-             'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
-    url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
-    data = getUrl(url)
-    if not data:
-        data  = getUrl(url, timeout=0)
-    result = simplejson.loads(data)
-    results = []
-    if result and 'query' in result:
-        for r in result['query']['search']:
-            title = r['title']
-            url = "http://en.wikipedia.org/wiki/%s" % title.replace(' ', '_')
-            results.append((title, url, ''))
-    return results
-
--- a/oxweb/youtube.py
+++ b/oxweb/youtube.py
@ -1,99 +0,0 @@
-# -*- coding: utf-8 -*-
-# vi:si:et:sw=4:sts=4:ts=4
-from urllib import quote, unquote
-import httplib
-import xml.etree.ElementTree as ET
-import re
-
-import feedparser
-from oxlib.cache import getUrl, getUrlUnicode
-from oxlib import findString, findRe
-
-
-def getVideoKey(youtubeId):
-    data = getUrl("http://www.youtube.com/get_video_info?&video_id=%s" % youtubeId)
-    match = re.compile("token=(.+)&thumbnail").findall(data)
-    if match:
-        return unquote(match[0])
-    return False
- 
-def getVideoUrl(youtubeId, format='mp4'):
-    youtubeKey = getVideoKey(youtubeId)
-    if format == '720p':
-        fmt=22
-        url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt)
-    elif format == 'mp4':
-        fmt=18
-        url = "http://youtube.com/get_video.php?video_id=%s&t=%s&fmt=%s" % (youtubeId, youtubeKey, fmt)
-    else:
-        url = "http://youtube.com/get_video.php?video_id=%s&t=%s" % (youtubeId, youtubeKey)
-    return url
-
-def getMovieInfo(youtubeId, video_url_base=None):
-    url = "http://gdata.youtube.com/feeds/api/videos/%s" % youtubeId
-    data = getUrl(url)
-    fd = feedparser.parse(data)
-    return getInfoFromAtom(fd.entries[0], video_url_base)
-
-def getInfoFromAtom(entry, video_url_base=None):
-    info = dict()
-    info['title'] = entry['title']
-    info['description'] = entry['description']
-    info['author'] = entry['author']
-    #info['published'] = entry['published_parsed']
-    if 'media_keywords' in entry:
-        info['keywords'] = entry['media_keywords'].split(', ')
-    info['url'] = entry['links'][0]['href']
-    info['id'] = findString(info['url'], "/watch?v=") 
-    info['thumbnail'] = "http://img.youtube.com/vi/%s/0.jpg" % info['id']
-    if video_url_base:
-        info['flv'] = "%s/%s.%s" % (video_url_base, info['id'], 'flv')
-        info['mp4'] = "%s/%s.%s" % (video_url_base, info['id'], 'mp4')
-    else:
-        info['flv'] = getVideoUrl(info['id'], 'flv')
-        info['mp4'] = getVideoUrl(info['id'], 'mp4')
-        info['720p'] = getVideoUrl(info['id'], '720p')
-    info['embed'] = '<object width="425" height="355"><param name="movie" value="http://www.youtube.com/v/%s&hl=en"></param><param name="wmode" value="transparent"></param><embed src="http://www.youtube.com/v/%s&hl=en" type="application/x-shockwave-flash" wmode="transparent" width="425" height="355"></embed></object>' % (info['id'], info['id'])
-    return info
-
-def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
-    query = quote(query)
-    url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
-    data = getUrlUnicode(url)
-    fd = feedparser.parse(data)
-    videos = []
-    for entry in fd.entries:
-        v = getInfoFromAtom(entry, video_url_base)
-        videos.append(v)
-        if len(videos) >= max_results:
-            return videos
-    return videos
-
-'''
-def find(query, max_results=10, offset=1, orderBy='relevance', video_url_base=None):
-  url = "http://youtube.com/results?search_query=%s&search=Search" % quote(query)
-  data = getUrlUnicode(url)
-  regx = re.compile(' <a href="/watch.v=(.*?)" title="(.*?)" ')
-  regx = re.compile('<a href="/watch\?v=(\w*?)" ><img src="(.*?)"  class="vimg120" title="(.*?)" alt="video">')
-  id_title = regx.findall(data)
-  data_flat = data.replace('\n', ' ')
-  videos = {}
-  for video in id_title:
-    vid = video[0]
-    if vid not in videos:
-      v = dict()
-      v['id'] = vid
-      v['link'] = "http//youtube.com/watch.v=%s" % v['id']
-      v['title'] = video[2].strip()
-      if video_url_base:
-        v['video_link'] = "%s/%s" % (video_url_base, v['id'])
-      else:
-        v['video_url'] = getVideoUrl(v['id'])
-      v['description'] = findRe(data, 'BeginvidDesc%s">(.*?)</span>' % v['id']).strip().replace('<b>', ' ').replace('</b>', '')
-      v['thumbnail'] = video[1]
-    videos[vid] = v
-    if len(videos) >= max_results:
-        return videos.values()
-  return videos.values()
-'''
-