Switch to python3

2014-09-30 18:15:32 +02:00 · 2014-09-30 18:15:32 +02:00 · 9ba4b6a91a
commit 9ba4b6a91a
parent 531041e89a
5286 changed files with 677347 additions and 576888 deletions
--- a/Shared/lib/python3.4/site-packages/ox/web/init.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/init.py
@ -0,0 +1,9 @@
+# vi:si:et:sw=4:sts=4:ts=4
+# encoding: utf-8
+__version__ = '1.0.0'
+
+from . import imdb
+from . import wikipedia
+from . import google
+from . import piratecinema
+from . import oxdb
--- a/Shared/lib/python3.4/site-packages/ox/web/abebooks.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/abebooks.py
@ -0,0 +1,20 @@
+from ox.cache import read_url
+import re
+import lxml.html
+
+def get_data(id):
+    info = {}
+    base = 'http://www.abebooks.com'
+    url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
+    data = read_url(url)
+    urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
+    if urls:
+        details = '%s%s' % (base, urls[0])
+        data = read_url(details)
+        doc = lxml.html.document_fromstring(data)
+        for e in doc.xpath("//*[contains(@id, 'biblio')]"):
+            key = e.attrib['id'].replace('biblio-', '')
+            value = e.text_content()
+            if value and key not in ('bookcondition', 'binding'):
+                info[key] = value
+    return info
--- a/Shared/lib/python3.4/site-packages/ox/web/allmovie.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/allmovie.py
@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+from ox import strip_tags, find_re
+from ox.cache import read_url
+
+
+def get_id(url):
+    return url.split("/")[-1]
+
+def get_data(id):
+    '''
+    >>> get_data('129689')['cast'][1][1]
+    u'Marianne'
+    >>> get_data('129689')['credits'][0][0]
+    u'Jean-Luc Godard'
+    >>> get_data('129689')['posters'][0]
+    u'http://image.allmusic.com/00/adg/cov200/dru800/u812/u81260bbffr.jpg'
+    >>> get_data('129689')['rating']
+    u'4.5'
+    '''
+    if id.startswith('http'):
+        id = get_id(id)
+    data = {
+        "url": get_url(id)
+    }
+    html = read_url(data["url"], unicode=True)
+    data['aka'] = parse_list(html, 'AKA')
+    data['category'] = find_re(html, '<dt>category</dt>.*?<dd>(.*?)</dd>')
+    data['countries'] = parse_list(html, 'countries')
+    data['director'] = parse_entry(html, 'directed by')
+    data['genres'] = parse_list(html, 'genres')
+    data['keywords'] = parse_list(html, 'keywords')
+    data['posters'] = [find_re(html, '<img src="(http://cps-.*?)"')]
+    data['produced'] = parse_list(html, 'produced by')
+    data['rating'] = find_re(html, 'Stars" title="(.*?) Stars"')
+    data['released'] = parse_entry(html, 'released by')
+    data['releasedate'] = parse_list(html, 'release date')
+    data['runtime'] = parse_entry(html, 'run time').replace('min.', '').strip()
+    data['set'] = parse_entry(html, 'set in')
+    data['synopsis'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
+    data['themes'] = parse_list(html, 'themes')
+    data['types'] = parse_list(html, 'types')
+    data['year'] = find_re(html, '<span class="year">.*?(\d+)')
+    #data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)]
+    data['stills'] = re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)
+    #html = read_url("http://allmovie.com/work/%s/cast" % id, unicode=True)
+    #data['cast'] = parse_table(html)
+    #html = read_url("http://allmovie.com/work/%s/credits" % id, unicode=True)
+    #data['credits'] = parse_table(html)
+    html = read_url("http://allmovie.com/work/%s/review" % id, unicode=True)
+    data['review'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
+    return data
+
+def get_url(id):
+    return "http://allmovie.com/work/%s" % id
+
+def parse_entry(html, title):
+    html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title)
+    return strip_tags(html).strip()
+
+def parse_list(html, title):
+    html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
+    r = map(strip_tags, re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
+    if not r and html:
+        r = [strip_tags(html)]
+    return r
+
+def parse_table(html):
+    return [
+        [
+            strip_tags(r).strip().replace('&nbsp;', '')
+            for r in x.split('<td width="305">-')
+        ]
+        for x in find_re(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
+    ]
+
+def parse_text(html, title):
+    return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
+
+if __name__ == '__main__':
+    print get_data('129689')
+    # print get_data('177524')
+
--- a/Shared/lib/python3.4/site-packages/ox/web/amazon.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/amazon.py
@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from six.moves.urllib.parse import quote
+
+from ox import find_re, strip_tags, decode_html
+from ox.cache import read_url
+
+import lxml
+
+
+def findISBN(title, author):
+    q = '%s %s' % (title, author)
+    url = "http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Dstripbooks&field-keywords=" + "%s&x=0&y=0" % quote(q)
+    data = read_url(url, unicode=True)
+    links = re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)
+    id = find_re(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/')
+    data = get_data(id)
+    if author in data['authors']:
+        return data
+    return {}
+
+def get_data(id):
+    url = "http://www.amazon.com/title/dp/%s/" % id
+    data = read_url(url, unicode=True)
+
+
+    def find_data(key):
+        return find_re(data, '<li><b>%s:</b>(.*?)</li>'% key).strip()
+
+    r = {}
+    r['amazon'] = url
+    r['title'] = find_re(data, '<span id="productTitle" class="a-size-large">(.*?)</span>')
+    r['authors'] = []
+    doc = lxml.html.document_fromstring(data)
+    for e in doc.xpath("//span[contains(@class, 'author')]"):
+        print e
+        for secondary in e.xpath(".//span[contains(@class, 'a-color-secondary')]"):
+            if 'Author' in secondary.text:
+                author = e.xpath(".//span[contains(@class, 'a-size-medium')]")
+                if author:
+                    r['authors'].append(author[0].text.strip())
+                else:
+                    r['authors'].append(e.xpath('.//a')[0].text.strip())
+                break
+            elif 'Translator' in secondary.text:
+                r['translator'] = [e.xpath('.//a')[0].text]
+                break
+    r['publisher'] = find_data('Publisher')
+    r['language'] = find_data('Language')
+    r['isbn-10'] = find_data('ISBN-10')
+    r['isbn-13'] = find_data('ISBN-13').replace('-', '')
+    r['dimensions'] = find_re(data, '<li><b>.*?Product Dimensions:.*?</b>(.*?)</li>')
+
+    r['pages'] = find_data('Paperback')
+    if not r['pages']:
+        r['pages'] = find_data('Hardcover')
+
+    r['review'] = strip_tags(find_re(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
+
+    for e in doc.xpath('//noscript'):
+        for c in e.getchildren():
+            if c.tag == 'div':
+                r['description'] = strip_tags(decode_html(lxml.html.tostring(c))).strip()
+                break
+
+    r['cover'] = re.findall('src="(.*?)" id="prodImage"', data)
+    if r['cover']:
+        r['cover'] = r['cover'][0].split('._BO2')[0]
+        if not r['cover'].endswith('.jpg'):
+            r['cover'] = r['cover'] + '.jpg'
+        if 'no-image-avail-img' in r['cover']:
+            del r['cover']
+    else:
+        del r['cover']
+    return r
+
--- a/Shared/lib/python3.4/site-packages/ox/web/apple.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/apple.py
@ -0,0 +1,67 @@
+import json
+import re
+
+from ox.cache import read_url
+
+HEADERS = {
+    'User-Agent': 'iTunes/10.4 (Macintosh; Intel Mac OS X 10.7) AppleWebKit/534.48.3',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'en-us, en;q=0.50',
+    'X-Apple-Store-Front': '143441-1,12',
+    'X-Apple-Tz': '7200',
+    'Accept-Encoding': 'gzip, deflate'
+}
+USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
+USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
+
+def get_movie_data(title, director):
+    if isinstance(title, unicode):
+        title = title.encode('utf-8')
+    if isinstance(director, unicode):
+        director = director.encode('utf-8')
+    data = {}
+    # itunes section (preferred source for link)
+    url = 'http://ax.search.itunes.apple.com/WebObjects/MZSearch.woa/wa/advancedSearch'
+    url += '?media=movie&movieTerm=' + title
+    url += '&actorNames=&directorProducerName=' + director
+    url += '&releaseYearTerm=&descriptionTerm=&genreIndex=1&ratingIndex=1'
+    HEADERS['Referer'] = url
+    html = read_url(url, headers=HEADERS, unicode=True)
+    regexp = '<a href="(http://itunes.apple.com/us/movie/.*?)" class="artwork-link"><div class="artwork">'
+    regexp += '<img width=".*?" height=".*?" alt=".*?" class="artwork" src="(.*?)" /></div></a>'
+    results = re.compile(regexp).findall(html)
+    if results:
+        data['link'] = results[0][0]
+        data['poster'] = results[0][1].replace('140x140', '600x600')
+        html = read_url(data['link'], headers=HEADERS, unicode=True)
+        results = re.compile('video-preview-url="(.*?)"').findall(html)
+        if results:
+            data['trailer'] = results[0]
+    # trailers section (preferred source for poster and trailer)
+    host = 'http://trailers.apple.com'
+    url = host + '/trailers/home/scripts/quickfind.php?callback=searchCallback&q=' + title
+    js = json.loads(read_url(url, unicode=True)[16:-4])
+    results = js['results']
+    if results:
+        url = host + results[0]['location']
+        if not 'link' in data:
+            data['link'] = url
+        headers = {
+            'User-Agent': USER_AGENT
+        }
+        html = read_url(url, headers=headers, unicode=True)
+        results = re.compile('"(' + host + '.*?poster\.jpg)"').findall(html)
+        if results:
+            data['poster'] = results[0].replace('poster.jpg', 'poster-xlarge.jpg')
+        html = read_url(url + 'includes/playlists/web.inc', headers=headers, unicode=True)
+        results = re.compile('"(' + host + '\S+\.mov)"').findall(html)
+        if results:
+            data['trailer'] = results[-1]
+    return data
+
+if __name__ == '__main__':
+    print get_movie_data('Alphaville', 'Jean-Luc Godard')
+    print get_movie_data('Sin City', 'Roberto Rodriguez')
+    print get_movie_data('Breathless', 'Jean-Luc Godard')
+    print get_movie_data('Capitalism: A Love Story', 'Michael Moore')
+    print get_movie_data('Film Socialisme', 'Jean-Luc Godard')
--- a/Shared/lib/python3.4/site-packages/ox/web/archive.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/archive.py
@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from .. import cache
+from ..utils import json
+
+def get_id(url):
+    return url.split("/")[-1]
+
+def get_url(id):
+    return "http://www.archive.org/details/%s" % id
+
+def get_data(id):
+    data = {}
+    url = get_url(id)
+    details = cache.read_url('%s?output=json' % url)
+    details = json.loads(details)
+    for key in ('title', 'description', 'runtime'):
+        data[key] = details['metadata'][key]
+        if isinstance(data[key], list):
+            data[key] = data[key][0]
+    data['url'] = url
+    data['image'] = 'http://archive.org/download/%s/format=thumbnail' % id
+    data['ogg'] = 'http://archive.org/download/%s/format=Ogg+video' % id
+    data['mp4'] = 'http://archive.org/download/%s/format=512Kb+MPEG4' % id
+    return data
+
--- a/Shared/lib/python3.4/site-packages/ox/web/arsenalberlin.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/arsenalberlin.py
@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import json
+import os
+import re
+
+from ox import find_re, strip_tags
+from ox.cache import read_url
+
+def get_data(id, language='en'):
+    if language == 'de':
+        url = 'http://films.arsenal-berlin.de/index.php/Detail/Object/Show/object_id/%d/lang/de_DE' % id
+    else:
+        url = 'http://films.arsenal-berlin.de/index.php/Detail/Object/Show/object_id/%d' % id
+    html = read_url(url, unicode=True)
+    if 'ID does not exist' in html:
+        return None
+    if 'Willkommen in der Datenbank des Arsenal' in html:
+        return None
+    data = {}
+    data[u'id'] = id
+    data[u'url'] = url
+    m = re.compile('<h1>(.*?)</h1>').findall(html)
+    if m:
+        data[u'title'] = m[0]
+    m = re.compile("<b>Director: </b><a href='.*?'>(.*?)</a>").findall(html)
+    if m:
+        data[u'director'] = m[0]
+
+    m = re.compile("caUI.initImageScroller\(\[\{url:'(.*?)'").findall(html)
+    if m:
+        data[u'image'] = m[0]
+
+    units = re.compile("<div class='unit'>(.*?)</div>", re.DOTALL).findall(html)
+    for x in map(re.compile('<b>(.*?)</b>: (.*)', re.DOTALL).findall, units):
+        if x:
+            #data[x[0][0].lower()] = strip_tags(x[0][1])
+            key = x[0][0].lower()
+            data[key] = x[0][1]
+            if key == "forum catalogue pdf":
+                data[key] = find_re(data[key], '"(http:.*?)"')
+            else:
+                data[key] = strip_tags(data[key])
+    if "running time (minutes)" in data:
+        data[u'runtime'] = float(data.pop("running time (minutes)").replace(',', '.')) * 60
+    for key in ('year', 'length in metres', 'forum participation year', 'number of reels'):
+        if key in data and data[key].isdigit():
+            data[key] = int(data[key])
+    return data
+
+def backup(filename):
+    if os.path.exists(filename):
+        with open(filename) as f:
+            data = json.load(f)
+    else:
+        data = {}
+    start = max(map(int, data)) or 1
+    for i in range(start, 11872):
+        info = get_data(i)
+        if info:
+            data[i] = info
+            if len(data) % 10 == 0:
+                print 'save', filename, len(data)
+                with open(filename, 'w') as f:
+                    json.dump(data, f)
+        else:
+            print 'ignore', i
+    with open(filename, 'w') as f:
+        json.dump(data, f)
+    return data
+
--- a/Shared/lib/python3.4/site-packages/ox/web/auth.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/auth.py
@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+# GPL 2009
+import os
+
+from ox.utils import json
+
+def get(key):
+    user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json'))
+    auth = {}
+    if os.path.exists(user_auth):
+        f = open(user_auth, "r")
+        data = f.read()
+        f.close()
+        auth = json.loads(data)
+    if key in auth:
+        return auth[key]
+    print "please add key %s to json file '%s'" % (key, user_auth)
+    raise Exception,"no key %s found" % key
+
+def update(key, value):
+    user_auth = os.environ.get('oxAUTH', os.path.expanduser('~/.ox/auth.json'))
+    auth = {}
+    if os.path.exists(user_auth):
+        f = open(user_auth, "r")
+        data = f.read()
+        f.close()
+        auth = json.loads(data)
+    auth[key] = value
+    f = open(user_auth, "w")
+    f.write(json.dumps(auth, indent=2))
+    f.close()
+    
--- a/Shared/lib/python3.4/site-packages/ox/web/criterion.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/criterion.py
@ -0,0 +1,100 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+import ox.cache
+from ox.cache import read_url
+from ox.html import strip_tags
+from ox.text import find_re
+
+import imdb
+
+def get_id(url):
+    return url.split("/")[-1]
+
+def get_url(id):
+    return "http://www.criterion.com/films/%s" % id
+
+def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
+    '''
+    >>> get_data('1333').get('imdbId')
+    u'0060304'
+
+    >>> get_data('236')['posters'][0]
+    u'http://s3.amazonaws.com/criterion-production/release_images/1586/ThirdManReplace.jpg'
+
+    >>> get_data('786')['posters'][0]
+    u'http://s3.amazonaws.com/criterion-production/product_images/185/343_box_348x490.jpg'
+    '''
+    data = {
+        "url": get_url(id)
+    }
+    try:
+        html = read_url(data["url"], timeout=timeout, unicode=True)
+    except:
+        html = ox.cache.read_url(data["url"], timeout=timeout)
+    data["number"] = find_re(html, "<li>Spine #(\d+)")
+
+    data["title"] = find_re(html, "<h1 class=\"movietitle\">(.*?)</h1>")
+    data["title"] = data["title"].split(u' \u2014 The Television Version')[0]
+    data["director"] = strip_tags(find_re(html, "<h2 class=\"director\">(.*?)</h2>"))
+    results = find_re(html, '<div class="left_column">(.*?)</div>')
+    results = re.compile("<li>(.*?)</li>").findall(results)
+    data["country"] = results[0]
+    data["year"] = results[1]
+    data["synopsis"] = strip_tags(find_re(html, "<div class=\"content_block last\">.*?<p>(.*?)</p>"))
+
+    result = find_re(html, "<div class=\"purchase\">(.*?)</div>")
+    if 'Blu-Ray' in result or 'Essential Art House DVD' in result:
+        r = re.compile('<h3 class="section_title first">Other Editions</h3>(.*?)</div>', re.DOTALL).findall(html)
+        if r:
+            result = r[0]
+    result = find_re(result, "<a href=\"(.*?)\"")
+    if not "/boxsets/" in result:
+        data["posters"] = [result]
+    else:
+        html_ = read_url(result, unicode=True)
+        result = find_re(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id)
+        result = find_re(result, "src=\"(.*?)\"")
+        if result:
+            data["posters"] = [result.replace("_w100", "")]
+        else:
+            data["posters"] = []
+    data['posters'] = [re.sub('(\?\d+)$', '', p) for p in data['posters']]
+    result = find_re(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"")
+    if result:
+        data["stills"] = [result]
+        data["trailers"] = []
+    else:
+        data["stills"] = filter(lambda x: x, [find_re(html, "\"thumbnailURL\", \"(.*?)\"")])
+        data["trailers"] = filter(lambda x: x, [find_re(html, "\"videoURL\", \"(.*?)\"")])
+
+    if timeout == ox.cache.cache_timeout:
+        timeout = -1
+    if get_imdb:
+        # removed year, as "title (year)" may fail to match
+        data['imdbId'] = imdb.get_movie_id(data['title'], data['director'], timeout=timeout)
+    return data
+
+def get_ids(page=None):
+    ids = []
+    if page:
+        url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
+        html = read_url(url)
+        results = re.compile("films/(\d+)").findall(html)
+        ids += results
+        results = re.compile("boxsets/(.*?)\"").findall(html)
+        for result in results:
+            html = read_url("http://www.criterion.com/boxsets/" + result)
+            results = re.compile("films/(\d+)").findall(html)
+            ids += results
+        return set(ids)
+    html = read_url("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine", unicode=True)
+    results = re.compile("\&amp;p=(\d+)\&").findall(html)
+    pages = max(map(int, results))
+    for page in range(1, pages):
+        ids += get_ids(page)
+    return sorted(set(ids), key=int)
+
+if __name__ == '__main__':
+    print get_ids()
--- a/Shared/lib/python3.4/site-packages/ox/web/dailymotion.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/dailymotion.py
@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from six.moves.urllib.parse import unquote
+from ox.cache import read_url
+
+
+def get_video_url(url):
+    '''
+    >>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
+    'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'
+
+    >>> get_video_url('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0]
+    'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv'
+    '''
+    data = read_url(url)
+    video = re.compile('''video", "(.*?)"''').findall(data)
+    for v in video:
+       v =  unquote(v).split('@@')[0]
+       return v
+    return ''
--- a/Shared/lib/python3.4/site-packages/ox/web/duckduckgo.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/duckduckgo.py
@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+from six.moves import urllib
+import ox
+from ox import strip_tags, decode_html
+from ox.cache import read_url
+
+
+def find(query, timeout=ox.cache.cache_timeout):
+    if not isinstance(query, bytes):
+        query = query.encode('utf-8')
+    params = urllib.parse.urlencode({'q': query})
+    url = 'http://duckduckgo.com/html/?' + params
+    data = read_url(url, timeout=timeout).decode('utf-8')
+    results = []
+    regex = '<a .*?class="large" href="(.+?)">(.*?)</a>.*?<div class="snippet">(.*?)</div>'
+    for r in re.compile(regex, re.DOTALL).findall(data):
+        results.append((strip_tags(decode_html(r[1])), r[0], strip_tags(decode_html(r[2]))))
+    return results
+    
--- a/Shared/lib/python3.4/site-packages/ox/web/epguides.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/epguides.py
@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+import time
+
+from ox import strip_tags, find_re
+from ox.cache import read_url
+
+import google
+
+
+def get_show_url(title):
+    ''' 
+    Search Epguide Url for Show via Show Title.
+    Use Google to search the url, this is also done on Epguide.
+    '''
+    for (name, url, desc) in google.find('allintitle: site:epguides.com %s' % title, 1):
+        if url.startswith('http://epguides.com'):
+              if re.search(title, name):
+                    return url
+    return None
+
+def get_show_data(url):
+    data = read_url(url, unicode=True)
+    r = {}
+    r['title'] = strip_tags(find_re(data, '<h1>(.*?)</h1>'))
+    r['imdb'] = find_re(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
+    r['episodes'] = {}
+    #1.   1- 1       1001      7 Aug 05   You Can't Miss the Bear
+    for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
+        air_date = episode[3].strip()
+        #'22 Sep 04' -> 2004-09-22 
+        try:
+            air_date = time.strftime('%Y-%m-%d', time.strptime(air_date, '%d %b %y'))
+        except:
+            pass
+        s = episode[1].split('-')[0].strip()
+        e = episode[1].split('-')[-1].strip()
+        try:
+            r['episodes']['S%02dE%02d' % (int(s), int(e))] = {
+                'prod code': episode[2],
+                'air date': air_date,
+                'url': episode[4],
+                'title':episode[5],
+            }
+        except:
+            print "oxweb.epguides failed,", url
+    return r
+
--- a/Shared/lib/python3.4/site-packages/ox/web/filmsdivision.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/filmsdivision.py
@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+import string
+import subprocess
+import ox
+import os
+
+def get_ids():
+    result = []
+    for i in string.ascii_uppercase:
+        url = "http://www.filmsdivision.org/search.php?title=%s" % i
+        data = ox.cache.read_url(url)
+        links = re.compile('view_video.php\?movId=(.*?)[\'"]', re.DOTALL).findall(data)
+        result += links
+    return list(set(result))
+
+def get_data(id):
+    result = {}
+    url = "http://www.filmsdivision.org/view_video.php?movId=%s" % id
+    data = ox.cache.read_url(url)
+    result['title'] = re.compile('<td.*?class="vdoheadtxt".*?>(.*?)</td>').findall(data)[0]
+    result['year'] = re.compile('Release: (\d{4})').findall(data)[0]
+    result['duration'] = int(re.compile('Duration: (\d+)mins').findall(data)[0]) * 60
+    result['producer'] = re.compile('Producer: (.*?)\t').findall(data)[0].strip()
+    if 'Director:' in data:
+        result['director'] = re.compile('Director: (.*?)\t').findall(data)[0].strip()
+    else:
+        result['director'] = "Unknown Director"
+    result['url'] = re.compile('value="(.*?.wmv)"').findall(data)[0]
+    return result
+
+def download_video(url, filename):
+    dirname = os.path.dirname(filename)
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+    p = subprocess.Popen(['gst-launch', 'mmssrc', 'location=%s'%url, '!', 'filesink', 'locaiton='%filename])
+    p.wait()
+    return p.returncode == 0
--- a/Shared/lib/python3.4/site-packages/ox/web/flixter.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/flixter.py
@ -0,0 +1,74 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+
+import re
+from lxml.html import document_fromstring
+
+from ox.cache import read_url
+from ox import find_re, strip_tags
+from ox.web.imdb import ImdbCombined
+
+
+def get_data(id, timeout=-1):
+    '''
+    >>> get_data('the-matrix')['poster']
+    'http://content7.flixster.com/movie/16/90/52/1690525_gal.jpg'
+
+    >>> get_data('0133093')['poster']
+    'http://content7.flixster.com/movie/16/90/52/1690525_gal.jpg'
+
+    >>> get_data('2-or-3-things-i-know-about-her')['poster']
+    'http://content6.flixster.com/movie/10/95/43/10954392_gal.jpg'
+
+    >>> get_data('0078875')['rottentomatoes_id']
+    'http://www.rottentomatoes.com/m/the-tin-drum/'
+    '''
+    if len(id) == 7:
+        try:
+            int(id)
+            id = get_id(imdb=id)
+        except:
+            pass
+    data = {
+        "url": get_url(id),
+    }
+    html = read_url(data['url'], timeout=timeout, unicode=True)
+    doc = document_fromstring(html)
+
+    props = {
+        'og:title': 'title',
+        'og:image': 'poster',
+        'og:url': 'rottentomatoes_id',
+    }
+    for meta in doc.head.findall('meta'):
+        prop = meta.attrib.get('property', None)
+        content = meta.attrib.get('content', '')
+        if prop in props and content:
+            data[props[prop]] = content
+
+    for p in doc.body.find_class('synopsis'):
+        data['synopsis'] = p.text.strip()
+
+    if 'poster' in data and data['poster']:
+        data['poster'] = data['poster'].replace('_pro.jpg', '_gal.jpg')
+    if not 'title' in data:
+        return None
+    return data
+
+def get_id(url=None, imdb=None):
+    '''
+    >>> get_id(imdb='0133093')
+    u'the-matrix'
+
+    #>>> get_id(imdb='0060304')
+    #u'2-or-3-things-i-know-about-her'
+    '''
+    if imdb:
+        i = ImdbCombined(imdb)
+        title = i['title']
+        return title.replace(' ', '-').lower().replace("'", '')
+    return url.split('/')[-1]
+
+def get_url(id):
+    return "http://www.flixster.com/movie/%s"%id
+
--- a/Shared/lib/python3.4/site-packages/ox/web/freebase.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/freebase.py
@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import json
+
+from ox.cache import read_url
+from ox import find_re
+
+class Freebase(dict):
+    def __init__(self, id, timeout=-1):
+        url = "http://ids.freebaseapps.com/get_ids?id=/authority/imdb/title/tt%s" % id
+        '''
+            "http://graph.freebase.com/imdb.title.tt%s" % id
+            might also be of interest at some point, right now not much info
+        '''
+        data = read_url(url, unicode=True)
+        try:
+            data = json.loads(data)
+        except ValueError:
+            return
+        '''
+        for key in data:
+            self[key] = data[key]
+        '''
+        for key in ('id', 'guid', 'name'):
+            self[key] = data[key]
+        keys = {
+            'wikipedia': '/wikipedia/en',
+            'netflix': '/authority/netflix/movie',
+            'nytimes': '/source/nytimes/movie',
+            'metacritic': '/source/metacritic/movie',
+        }
+        for key in keys:
+            links = filter(lambda x: x['namespace'] == keys[key],data['ids'])
+            if links:
+                self[key] = links[0]['uri']
+
+        if 'nytimes' in self:
+            self['nytimes'] = self['nytimes'].replace('_/overview', '%s/overview' % self['name'].replace(' ', '-'))
+            self['amgId'] = find_re(self['nytimes'], 'movie/(\d+)/')
+
+
+
--- a/Shared/lib/python3.4/site-packages/ox/web/google.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/google.py
@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from six.moves import urllib
+
+import ox
+from ox import strip_tags, decode_html
+
+DEFAULT_MAX_RESULTS = 10
+DEFAULT_TIMEOUT = 24*60*60
+
+def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
+    return ox.cache.read_url(url, data, headers, timeout, unicode=True)
+
+def quote_plus(s):
+    if not isinstance(s, bytes):
+        s = s.encode('utf-8')
+    return urllib.parse.quote_plus(s)
+
+def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
+    """
+    Return max_results tuples with title, url, description 
+
+    >>> find("The Matrix site:imdb.com", 1)[0][0]
+    u'The Matrix (1999) - IMDb'
+
+    >>> find("The Matrix site:imdb.com", 1)[0][1]
+    u'http://www.imdb.com/title/tt0133093/'
+    """
+    results = []
+    offset = 0
+    while len(results) < max_results:
+        url = 'http://google.com/search?q=%s' % quote_plus(query)
+        if offset:
+            url += '&start=%d' % offset
+        data = read_url(url, timeout=timeout)
+        data = re.sub('<span class="f">(.*?)</span>', '\\1', data)
+        for a in re.compile('<a href="(htt\S+?)".*?>(.*?)</a>.*?<span class="st">(.*?)<\/span>').findall(data):
+            results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2]))))
+            if len(results) >= max_results:
+                break
+        offset += 10
+    return results
+
--- a/Shared/lib/python3.4/site-packages/ox/web/imdb.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/imdb.py
@ -0,0 +1,821 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from __future__ import print_function
+
+import re
+import time
+import unicodedata
+
+from six.moves import urllib
+from six import string_types
+
+
+from .. import find_re, strip_tags, decode_html
+from .. import cache
+
+
+from . siteparser import SiteParser
+from . import duckduckgo
+from ..utils import datetime
+from ..geo import normalize_country_name
+
+def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False):
+    headers = headers.copy()
+    return cache.read_url(url, data, headers, timeout, unicode=unicode)
+
+def get_url(id):
+    return "http://www.imdb.com/title/tt%s/" % id
+
+class Imdb(SiteParser):
+    '''
+    >>> Imdb('0068646')['title']
+    u'The Godfather'
+
+    >>> Imdb('0133093')['title']
+    u'The Matrix'
+    '''
+    regex =  {
+        'alternativeTitles': {
+            'page': 'releaseinfo',
+            're': [
+                'name="akas".*?<table.*?>(.*?)</table>',
+                "td>(.*?)</td>.*?<td>(.*?)</td>"
+            ],
+            'type': 'list'
+        
+        },
+        'aspectratio': {
+            'page': 'combined',
+            're': 'Aspect Ratio:</h5><div class="info-content">([\d\.]+)',
+            'type': 'float',
+        },
+        'budget': {
+            'page': 'business',
+            're': [
+                '<h5>Budget</h5>\s*?\$(.*?)<br',
+                lambda data: find_re(decode_html(data).replace(',', ''), '\d+')
+            ],
+            'type': 'int'
+        },
+        'cast': {
+            'page': 'combined',
+            're': [
+                '<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
+                lambda ll: [strip_tags(l) for l in ll]
+             ],
+            'type': 'list'
+        },
+        'cinematographer': {
+            'page': 'combined',
+            're': [
+                lambda data: data.split('Series Crew')[0],
+                'Cinematography by</a>(.*?)</table>',
+                '<a href="/name/.*?/">(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'connections': {
+            'page': 'trivia?tab=mc',
+            're': '<h4 class="li_group">(.*?)</h4>(.*?)(<\/div>\n  <a|<script)',
+            'type': 'list'
+        },
+        'country': {
+            'page': 'combined',
+            're': [
+                '<div class="info"><h5>Country:</h5>.*?<div class="info">',
+                #'<a href="/country/.*?">(.*?)</a>', #links changed to work with existing caches, just take all links
+                '<a.*?>(.*?)</a>',
+            ],
+            'type': 'list'
+        },
+        'creator': {
+            'page': 'combined',
+            're': [
+                '<h5>Creator.?:</h5>.*?<div class="info-content">(.*?)</div>',
+                '<a href="/name/.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'director': {
+            'page': 'combined',
+            're': [
+                lambda data: data.split('<b>Series Crew</b>')[0],
+                'Directed by</a>(.*?)</table>',
+                '<a href="/name/.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        '_director': {
+            'page': 'combined',
+            're': [
+                '<h5>Director:</h5>.*?<div class="info-content">(.*?)</div>',
+                '<a href="/name/.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'editor': {
+            'page': 'combined',
+            're': [
+                lambda data: data.split('Series Crew')[0],
+                'Film Editing by</a>(.*?)</table>',
+                '<a href="/name/.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'composer': {
+            'page': 'combined',
+            're': [
+                lambda data: data.split('Series Crew')[0],
+                'Original Music by</a>(.*?)</table>',
+                '<a href="/name/.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'episodeTitle': {
+            'page': 'combined',
+            're': '<div id="tn15title">.*?<em>(.*?)</em>',
+            'type': 'string'
+        },
+        'filmingLocations': {
+            'page': 'locations',
+            're': [
+                '<a href="/search/title\?locations=.*?".*?>(.*?)</a>',
+                lambda data: data.strip(),
+            ],
+            'type': 'list'
+        },
+        'genre': {
+            'page': 'combined',
+            're': [
+                '<h5>Genre:</h5>(.*?)<hr',
+                '<a href="/Sections/Genres/.*?/">(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'gross': {
+            'page': 'business',
+            're': [
+                '<h5>Gross</h5>\s*?\$(.*?)<br',
+                lambda data: find_re(data.replace(',', ''), '\d+')
+            ],
+            'type': 'int'
+        },
+        'keyword': {
+            'page': 'keywords',
+            're': '<a href="/keyword/.*?>(.*?)</a>',
+            'type': 'list'
+        },
+        'language': {
+            'page': 'combined',
+            're': [
+                '<div class="info"><h5>Language:</h5>.*?<div class="info">',
+                #'<a href="/language/.*?">(.*?)</a>', #links changed to work with existing caches, just take all links
+                '<a.*?>(.*?)</a>',
+            ],
+            'type': 'list'
+        },
+        'summary': {
+            'page': 'plotsummary',
+            're': '<p class="plotSummary">(.*?)<\/p>',
+            'type': 'string'
+        },
+        'posterId': {
+            'page': 'combined',
+            're': '/primary-photo/media/rm(.*?)/tt',
+            'type': 'string'
+        },
+        'posterIds': {
+            'page': 'posters',
+            're': '/unknown-thumbnail/media/rm(.*?)/tt',
+            'type': 'list'
+        },
+        'producer': {
+            'page': 'combined',
+            're': [
+                lambda data: data.split('Series Crew')[0],
+                'Produced by</a>(.*?)</table>',
+                '<a href="/name/.*?/">(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'productionCompany': {
+            'page': 'combined',
+            're': [
+                'Production Companies</b><ul>(.*?)</ul>',
+                '<a href="/company/.*?/">(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'rating': {
+            'page': 'combined',
+            're': '<div class="starbar-meta">.*?<b>([\d,.]+?)/10</b>',
+            'type': 'float'
+        },
+        'releasedate': {
+            'page': 'releaseinfo',
+            're': [
+                '<td class="release_date">(.*?)</td>',
+                strip_tags,
+            ],
+            'type': 'list'
+        },
+        'reviews': {
+            'page': 'externalreviews',
+            're': [
+                '<ol>(.*?)</ol>',
+                '<li><a href="(http.*?)".*?>(.*?)</a></li>'
+            ],
+            'type': 'list'
+        },
+        'runtime': {
+            'page': 'combined',
+            're': '<h5>Runtime:</h5><div class="info-content">.*?([0-9]+ sec|[0-9]+ min).*?</div>',
+            'type': 'string'
+        },
+        'color': {
+            'page': 'combined',
+            're': [
+                '<h5>Color:</h5><div class="info-content">(.*?)</div>',
+                '<a.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'sound': {
+            'page': 'combined',
+            're': [
+                '<h5>Sound Mix:</h5><div class="info-content">(.*?)</div>',
+                '<a.*?>(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'season': {
+            'page': 'combined',
+            're': [
+                '<h5>Original Air Date:</h5>.*?<div class="info-content">(.*?)</div>',
+                '\(Season (\d+), Episode \d+\)',
+             ],
+            'type': 'int'
+        },
+        'episode': {
+            'page': 'combined',
+            're': [
+                '<h5>Original Air Date:</h5>.*?<div class="info-content">(.*?)</div>',
+                '\(Season \d+, Episode (\d+)\)',
+             ],
+            'type': 'int'
+        },
+        'series': {
+            'page': 'combined',
+            're': '<h5>TV Series:</h5>.*?<a href="/title/tt(\d{7})',
+            'type': 'string'
+        },
+        'isSeries': {
+            'page': 'combined',
+            're': '<span class="tv-extra">(TV series|TV mini-series) ',
+            'type': 'string'
+        },
+        'title': {
+            'page': 'combined',
+            're': '<h1>(.*?) <span>',
+            'type': 'string'
+        },
+        'trivia': {
+            'page': 'trivia',
+            're': [
+                '<div class="sodatext">(.*?)<(br|/div)',
+                lambda data: data[0]
+            ],
+            'type': 'list',
+        },
+        'votes': {
+            'page': 'combined',
+            're': '<a href="ratings" class="tn15more">([\d,]*?) votes</a>',
+            'type': 'string'
+        },
+        'writer': {
+            'page': 'combined',
+            're': [
+                lambda data: data.split('Series Crew')[0],
+                'Writing credits</a>(.*?)</table>',
+                '<a href="/name/.*?/">(.*?)</a>'
+            ],
+            'type': 'list'
+        },
+        'year': {
+            'page': 'combined',
+            're': '="og:title" content="[^"]*?\((\d{4}).*?"',
+            'type': 'int'
+        }
+    }
+
+    def read_url(self, url, timeout):
+        if not url in self._cache:
+            self._cache[url] = read_url(url, timeout=timeout, unicode=True)
+        return self._cache[url]
+
+    def __init__(self, id, timeout=-1):
+        #use akas.imdb.com to always get original title:
+        #http://www.imdb.com/help/show_leaf?titlelanguagedisplay
+        self.baseUrl = "http://akas.imdb.com/title/tt%s/" % id
+        super(Imdb, self).__init__(timeout)
+       
+        url = self.baseUrl + 'combined' 
+        page = self.read_url(url, timeout=-1)
+        if '<title>IMDb: Page not found</title>' in page \
+            or 'The requested URL was not found on our server.' in page:
+            return
+        if "<p>We're sorry, something went wrong.</p>" in page:
+            time.sleep(1)
+            super(Imdb, self).__init__(0)
+
+        if 'alternativeTitles' in self:
+            if len(self['alternativeTitles']) == 2 and \
+               isinstance(self['alternativeTitles'][0], string_types):
+               self['alternativeTitles'] = [self['alternativeTitles']]
+
+        #normalize country names
+        if 'country' in self:
+            self['country'] = [normalize_country_name(c) or c for c in self['country']]
+
+        if 'sound' in self:
+            self['sound'] = list(set(self['sound']))
+
+        types = {}
+        stop_words = [ 
+            'alternative spelling',
+            'alternative title',
+            'alternative transliteration',
+            'closing credits title',
+            'complete title',
+            'IMAX version',
+            'informal short title',
+            'International (Spanish title)',
+            'Japan (imdb display title)',
+            'longer version',
+            'new title',
+            'original subtitled version',
+            'pre-release title',
+            'promotional abbreviation',
+            'recut version',
+            'reissue title',
+            'restored version',
+            'script title',
+            'short title',
+            '(subtitle)',
+            'TV title',
+            'working title',
+            'World-wide (Spanish title)',
+        ]
+        #ignore english japanese titles
+        #for movies that are not only from japan
+        if ['Japan'] != self.get('country', []):
+            stop_words += [
+                'Japan (English title)'
+            ]
+        for t in self.get('alternativeTitles', []):
+            for type in t[0].split('/'):
+                type = type.strip()
+                stop_word = False
+                for key in stop_words:
+                    if key in type:
+                        stop_word = True
+                        break
+                if not stop_word:
+                    if not type in types:
+                        types[type] = []
+                    types[type].append(t[1])
+        titles = {}
+        for type in types:
+            for title in types[type]:
+                if not title in titles:
+                    titles[title] = []
+                titles[title].append(type)
+        def select_title(type):
+            title = types[type][0]
+            count = 0
+            if len(types[type]) > 1:
+                for t in types[type]:
+                    if len(titles[t]) > count:
+                        count = len(titles[t])
+                        title = t
+            return title
+
+        #FIXME: does work in python2.6, possible to import from __future__?
+        #types = {type: select_title(type) for type in types}
+        _types = {}
+        for type in types:
+            _types[type] = select_title(type)
+        types = _types
+
+        regexps = [
+            "^.+ \(imdb display title\) \(English title\)$",
+            "^USA \(imdb display title\)$",
+            "^International \(English title\)$",
+            "^International \(English title\)$",
+            "^UK \(imdb display title\)$",
+            "^International \(.+\) \(English title\)$",
+            "^World-wide \(English title\)$",
+        ]
+        if 'Hong Kong' in self.get('country', []):
+            regexps += [
+                "Hong Kong \(English title\)"
+            ]
+        english_countries = (
+            'USA', 'UK', 'United States', 'United Kingdom',
+            'Australia', 'New Zealand'
+        )
+        if not filter(lambda c: c in english_countries, self.get('country', [])):
+            regexps += [
+                "^[^(]+ \(English title\)$",
+                "^.+ \(.+\) \(English title\)$",
+                "^USA$",
+                "^UK$",
+                "^USA \(.+\)$",
+                "^UK \(.+\)$",
+                "^Australia \(.+\)$",
+                "World-wide \(English title\)",
+                "\(literal English title\)",
+                "^International \(.+ title\)$",
+                "^International \(.+\) \(.+ title\)$",
+            ]
+        for regexp in regexps:
+            for type in types:
+                if re.compile(regexp).findall(type):
+                    #print types[type], type
+                    self['internationalTitle'] = types[type]
+                    break
+            if 'internationalTitle' in self:
+                break
+
+        def cleanup_title(title):
+            if title.startswith('"') and title.endswith('"'):
+                title = title[1:-1]
+            if title.startswith("'") and title.endswith("'"):
+                title = title[1:-1]
+            title = re.sub('\(\#[.\d]+\)', '', title)
+            return title.strip()
+
+        for t in ('title', 'internationalTitle'):
+            if t in self:
+                self[t] = cleanup_title(self[t])
+
+        if 'internationalTitle' in self and \
+            self.get('title', '').lower() == self['internationalTitle'].lower():
+            del self['internationalTitle']
+
+        if 'alternativeTitles' in self:
+            alt = {}
+            for t in self['alternativeTitles']:
+                title = cleanup_title(t[1])
+                if title not in (self.get('title'), self.get('internationalTitle')):
+                    if title not in alt:
+                        alt[title] = []
+                    for c in t[0].split('/'):
+                        if not '(working title)' in c:
+                            c = c.replace('International', '').replace('World-wide', '').split('(')[0].strip()
+                            if c:
+                                alt[title].append(c)
+            self['alternativeTitles'] = []
+            for t in sorted(alt, key=lambda a: sorted(alt[a])):
+                if alt[t]:
+                    countries = sorted([normalize_country_name(c) or c for c in alt[t]])
+                    self['alternativeTitles'].append((t, countries))
+            if not self['alternativeTitles']:
+                del self['alternativeTitles']
+
+        if 'internationalTitle' in self:
+            self['originalTitle'] = self['title']
+            self['title'] = self.pop('internationalTitle')
+
+        if 'runtime' in self and self['runtime']:
+            if 'min' in self['runtime']: base=60
+            else: base=1
+            self['runtime'] = int(find_re(self['runtime'], '([0-9]+)')) * base
+        if 'runtime' in self and not self['runtime']:
+            del self['runtime']
+        if 'votes' in self: self['votes'] = self['votes'].replace(',', '')
+
+        if 'cast' in self:
+            if isinstance(self['cast'][0], string_types):
+                self['cast'] = [self['cast']]
+            self['actor'] = [c[0] for c in self['cast']]
+            def cleanup_character(c):
+                c = c.replace('(uncredited)', '').strip()
+                return c
+            self['cast'] = [{'actor': x[0], 'character': cleanup_character(x[1])}
+                            for x in self['cast']]
+
+        if 'connections' in self:
+            cc={}
+            if len(self['connections']) == 3 and isinstance(self['connections'][0], string_types):
+                self['connections'] = [self['connections']]
+            for rel, data, _ in self['connections']:
+                if isinstance(rel, bytes):
+                    rel = rel.decode('utf-8')
+                #cc[rel] = re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>').findall(data)
+                def get_conn(c):
+                    r = {
+                        'id': c[0],
+                        'title': cleanup_title(c[1]),
+                    }
+                    description = c[2].split('<br />')
+                    if len(description) == 2 and description[-1].strip() != '-':
+                        r['description'] = description[-1].strip()
+                    return r
+                cc[rel] = list(map(get_conn, re.compile('<a href="/title/tt(\d{7})/">(.*?)</a>(.*?)<\/div', re.DOTALL).findall(data)))
+
+
+            self['connections'] = cc
+
+        for key in ('country', 'genre'):
+            if key in self:
+                self[key] = list(filter(lambda x: x.lower() != 'home', self[key]))
+        #0092999
+        if '_director' in self:
+            if 'series' in self or 'isSeries' in self:
+                self['creator'] = self.pop('_director')
+            else:
+                del self['_director']
+        if 'isSeries' in self:
+            del self['isSeries']
+            self['isSeries'] = True
+        if 'episodeTitle' in self:
+            self['episodeTitle'] = re.sub('Episode \#\d+\.\d+', '', self['episodeTitle'])
+
+        if 'series' in self:
+            series = Imdb(self['series'], timeout=timeout)
+            self['seriesTitle'] = series['title']
+            if 'episodeTitle' in self:
+                self['seriesTitle'] = series['title']
+                if 'season' in self and 'episode' in self:
+                    self['title'] = "%s (S%02dE%02d) %s" % (
+                        self['seriesTitle'], self['season'], self['episode'], self['episodeTitle'])
+                else:
+                    self['title'] = "%s (S01) %s" % (self['seriesTitle'], self['episodeTitle'])
+                    self['season'] = 1
+                self['title'] = self['title'].strip()
+            if 'director' in self:
+                self['episodeDirector'] = self['director']
+
+            if not 'creator' in series and 'director' in series:
+                series['creator'] = series['director']
+                if len(series['creator']) > 10:
+                    series['creator'] = series['director'][:1]
+
+            for key in ['creator', 'country']:
+                if key in series:
+                    self[key] = series[key]
+
+            if 'year' in series:
+                self['seriesYear'] = series['year']
+                if not 'year' in self:
+                    self['year'] = series['year']
+
+            if 'year' in self:
+                self['episodeYear'] = self['year']
+            if 'creator' in self:
+                self['seriesDirector'] = self['creator']
+            if 'originalTitle' in self:
+                del self['originalTitle']
+        else:
+            for key in ('seriesTitle', 'episodeTitle', 'season', 'episode'):
+                if key in self:
+                    del self[key]
+        if 'creator' in self:
+            if 'director' in self:
+                self['episodeDirector'] = self['director']
+            self['director'] = self['creator']
+
+        #make lists unique but keep order
+        for key in ('director', 'language'):
+            if key in self:
+                self[key] = [x for i,x in enumerate(self[key])
+                             if x not in self[key][i+1:]]
+
+        for key in ('actor', 'writer', 'producer', 'editor', 'composer'):
+            if key in self:
+                if isinstance(self[key][0], list):
+                    self[key] = [i[0] for i in self[key] if i]
+                self[key] = sorted(list(set(self[key])), key=lambda a: self[key].index(a))
+
+
+        if 'budget' in self and 'gross' in self:
+            self['profit'] = self['gross'] - self['budget']
+
+        if 'releasedate' in self:
+            def parse_date(d):
+                try:
+                    d = datetime.strptime(d, '%d %B %Y')
+                except:
+                    try:
+                        d = datetime.strptime(d, '%B %Y')
+                    except:
+                        return 'x'
+                return '%d-%02d-%02d' % (d.year, d.month, d.day)
+            self['releasedate'] = min([
+                parse_date(d) for d in self['releasedate']
+            ])
+            if self['releasedate'] == 'x':
+                del self['releasedate']
+        if 'summary' in self:
+            if isinstance(self['summary'], list):
+                self['summary'] = self['summary'][0]
+            self['summary'] = self['summary'].split('</p')[0].strip()
+
+class ImdbCombined(Imdb):
+    def __init__(self, id, timeout=-1):
+        _regex = {}
+        for key in self.regex:
+            if self.regex[key]['page'] in ('combined', 'releaseinfo'):
+                _regex[key] = self.regex[key]
+        self.regex = _regex
+        super(ImdbCombined, self).__init__(id, timeout)
+
+def get_movie_by_title(title, timeout=-1):
+    '''
+    This only works for exact title matches from the data dump
+    Usually in the format
+        Title (Year)
+        "Series Title" (Year) {(#Season.Episode)}
+        "Series Title" (Year) {Episode Title (#Season.Episode)}
+
+    If there is more than one film with that title for the year
+        Title (Year/I)
+
+    >>> get_movie_by_title(u'"Father Knows Best" (1954) {(#5.34)}')
+    u'1602860'
+
+    >>> get_movie_by_title(u'The Matrix (1999)')
+    u'0133093'
+
+    >>> get_movie_by_title(u'Little Egypt (1951)')
+    u'0043748'
+
+    >>> get_movie_by_title(u'Little Egypt (1897/I)')
+    u'0214882'
+    
+    >>> get_movie_by_title(u'Little Egypt')
+    None 
+
+    >>> get_movie_by_title(u'"Dexter" (2006) {Father Knows Best (#1.9)}')
+    u'0866567'
+    '''
+    params = {'s':'tt','q': title}
+    if not isinstance(title, bytes):
+        try:
+            params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
+        except:
+            params['q'] = params['q'].encode('utf-8')
+    params = urllib.urlencode(params)
+    url = "http://akas.imdb.com/find?" + params
+    data = read_url(url, timeout=timeout, unicode=True)
+    #if search results in redirect, get id of current page
+    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
+    results = re.compile(r).findall(data)    
+    if results:
+        return results[0]
+    return None
+ 
+def get_movie_id(title, director='', year='', timeout=-1):
+    '''
+    >>> get_movie_id('The Matrix')
+    u'0133093'
+
+    >>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard')
+    u'0060304'
+
+    >>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
+    u'0060304'
+
+    >>> get_movie_id(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard')
+    u'0179214'
+
+    >>> get_movie_id(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
+    u'0179214'
+    '''
+    imdbId = {
+        (u'Le jour se l\xe8ve', u'Marcel Carn\xe9'): '0031514',
+        (u'Wings', u'Larisa Shepitko'): '0061196',
+        (u'The Ascent', u'Larisa Shepitko'): '0075404',
+        (u'Fanny and Alexander', u'Ingmar Bergman'): '0083922',
+        (u'Torment', u'Alf Sj\xf6berg'): '0036914',
+        (u'Crisis', u'Ingmar Bergman'): '0038675',
+        (u'To Joy', u'Ingmar Bergman'): '0043048',
+        (u'Humain, trop humain', u'Louis Malle'): '0071635',
+        (u'Place de la R\xe9publique', u'Louis Malle'): '0071999',
+        (u'God\u2019s Country', u'Louis Malle'): '0091125',
+        (u'Flunky, Work Hard', u'Mikio Naruse'): '0022036',
+        (u'The Courtesans of Bombay', u'Richard Robbins') : '0163591',
+        (u'Je tu il elle', u'Chantal Akerman') : '0071690',
+        (u'Hotel Monterey', u'Chantal Akerman') : '0068725',
+        (u'No Blood Relation', u'Mikio Naruse') : '023261',
+        (u'Apart from You', u'Mikio Naruse') : '0024214',
+        (u'Every-Night Dreams', u'Mikio Naruse') : '0024793',
+        (u'Street Without End', u'Mikio Naruse') : '0025338',
+        (u'Sisters of the Gion', u'Kenji Mizoguchi') : '0027672',
+        (u'Osaka Elegy', u'Kenji Mizoguchi') : '0028021',
+        (u'Blaise Pascal', u'Roberto Rossellini') : '0066839',
+        (u'Japanese Girls at the Harbor', u'Hiroshi Shimizu') : '0160535',
+        (u'The Private Life of Don Juan', u'Alexander Korda') : '0025681',
+        (u'Last Holiday', u'Henry Cass') : '0042665',
+        (u'A Colt Is My Passport', u'Takashi  Nomura') : '0330536',
+        (u'Androcles and the Lion', u'Chester Erskine') : '0044355',
+        (u'Major Barbara', u'Gabriel Pascal') : '0033868',
+        (u'Come On Children', u'Allan King') : '0269104',
+
+        (u'Jimi Plays Monterey & Shake! Otis at Monterey', u'D. A. Pennebaker and Chris Hegedus') : '',
+        (u'Martha Graham: Dance on Film', u'Nathan Kroll') : '',
+        (u'Carmen', u'Carlos Saura'): '0085297',
+        (u'The Story of a Cheat', u'Sacha Guitry'): '0028201',
+        (u'Weekend', 'Andrew Haigh'): '1714210',
+    }.get((title, director), None)
+    if imdbId:
+        return imdbId
+    params = {'s':'tt','q': title}
+    if director:
+        params['q'] = u'"%s" %s' % (title, director)
+    if year:
+        params['q'] = u'"%s (%s)" %s' % (title, year, director)
+    google_query = "site:imdb.com %s" % params['q']
+    if not isinstance(params['q'], bytes):
+        try:
+            params['q'] = unicodedata.normalize('NFKC', params['q']).encode('latin-1')
+        except:
+            params['q'] = params['q'].encode('utf-8')
+    params = urllib.urlencode(params)
+    url = "http://akas.imdb.com/find?" + params
+    #print url
+
+    data = read_url(url, timeout=timeout, unicode=True)
+    #if search results in redirect, get id of current page
+    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
+    results = re.compile(r).findall(data)    
+    if results:
+        return results[0]
+    #otherwise get first result
+    r = '<td valign="top">.*?<a href="/title/tt(\d{7})/"'
+    results = re.compile(r).findall(data)
+    if results:
+        return results[0]
+
+    #print (title, director), ": '',"
+    #print google_query
+    #results = google.find(google_query, timeout=timeout)
+    results = duckduckgo.find(google_query, timeout=timeout)
+    if results:
+        for r in results[:2]:
+            imdbId = find_re(r[1], 'title/tt(\d{7})')
+            if imdbId:
+                return imdbId
+    #or nothing
+    return ''
+
+def get_movie_poster(imdbId):
+    '''
+    >>> get_movie_poster('0133093')
+    'http://ia.media-imdb.com/images/M/MV5BMjEzNjg1NTg2NV5BMl5BanBnXkFtZTYwNjY3MzQ5._V1._SX338_SY475_.jpg'
+
+    >>> get_movie_poster('0994352')
+    'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
+    '''
+    info = ImdbCombined(imdbId)
+    if 'posterId' in info:
+        url = "http://www.imdb.com/media/rm%s/tt%s" % (info['posterId'], imdbId)
+        data = read_url(url).decode('utf-8', 'ignore')
+        poster = find_re(data, 'img.*?id="primary-img".*?src="(.*?)"')
+        return poster
+    elif 'series' in info:
+        return get_movie_poster(info['series'])
+    return ''
+
+def get_episodes(imdbId, season=None):
+    episodes = {}
+    url = 'http://www.imdb.com/title/tt%s/episodes' % imdbId
+    if season:
+        url += '?season=%d' % season
+        data = cache.read_url(url)
+        for e in re.compile('<div data-const="tt(\d{7})".*?>.*?<div>S(\d+), Ep(\d+)<\/div>\n<\/div>', re.DOTALL).findall(data):
+            episodes['S%02dE%02d' %(int(e[1]), int(e[2]))] = e[0]
+    else:
+        data = cache.read_url(url)
+        match = re.compile('<strong>Season (\d+)</strong>').findall(data)
+        if match:
+            for season in range(1, int(match[0]) + 1):
+               episodes.update(get_episodes(imdbId, season))
+    return episodes
+
+def max_votes():
+    url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
+    data = cache.read_url(url)
+    votes = max([int(v.replace(',', ''))
+        for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
+    return votes
+
+def guess(title, director='', timeout=-1):
+    return get_movie_id(title, director, timeout=timeout)
+
+if __name__ == "__main__":
+    import json
+    print(json.dumps(Imdb('0306414'), indent=2))
+    #print json.dumps(Imdb('0133093'), indent=2)
+
--- a/Shared/lib/python3.4/site-packages/ox/web/impawards.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/impawards.py
@ -0,0 +1,300 @@
+# vi:si:et:sw=4:sts=4:ts=4
+# encoding: utf-8
+import re
+
+from ox.cache import read_url
+from ox.html import strip_tags
+from ox.text import find_re
+
+
+def get_data(id):
+    '''
+    >>> get_data('1991/silence_of_the_lambs')['imdbId']
+    u'0102926'
+
+    >>> get_data('1991/silence_of_the_lambs')['posters'][0]
+    u'http://www.impawards.com/1991/posters/silence_of_the_lambs_ver1.jpg'
+
+    >>> get_data('1991/silence_of_the_lambs')['url']
+    u'http://www.impawards.com/1991/silence_of_the_lambs_ver1.html'
+    '''
+    data = {
+        'url': get_url(id)
+    }
+    html = read_url(data['url'], unicode=True)
+    data['imdbId'] = find_re(html, 'imdb.com/title/tt(\d{7})')
+    if not data['imdbId']:
+        data['imdbId'] = _id_map.get(id, '')
+    data['title'] = strip_tags(find_re(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
+    data['year'] = find_re(html, '\(<a href="alpha1.html">(.*?)</a>\)')
+    data['posters'] = []
+    poster = find_re(html, '<img src="(posters.*?)"')
+    if poster:
+        poster = 'http://www.impawards.com/%s/%s' % (data['year'], poster)
+        data['posters'].append(poster)
+    results = re.compile('<a href = (%s.*?html)' % id[5:], re.DOTALL).findall(html)
+    for result in results:
+        result = result.replace('_xlg.html', '.html')
+        url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
+        html = read_url(url, unicode=True)
+        result = find_re(html, '<a href = (\w*?_xlg.html)')
+        if result:
+            url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
+            html = read_url(url, unicode=True)
+            poster = 'http://www.impawards.com/%s/%s' % (data['year'], find_re(html, '<img SRC="(.*?)"'))
+        else:
+            poster = 'http://www.impawards.com/%s/%s' % (data['year'], find_re(html, '<img src="(posters.*?)"'))
+        data['posters'].append(poster)
+
+    return data
+
+def get_id(url):
+    split = url.split('/')
+    year = split[3]
+    split = split[4][:-5].split('_')
+    if split[-1] == 'xlg':
+        split.pop()
+    if find_re(split[-1], 'ver\d+$'):
+        split.pop()
+    id = '%s/%s' % (year, '_'.join(split))
+    return id
+
+def get_ids(page=None):
+    ids = []
+    if page:
+        html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout = -1, unicode=True)
+        results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
+        for result in results:
+            url = 'http://impawards.com/%s' % result
+            ids.append(get_id(url))
+        return set(ids)
+    #get all
+    html = read_url('http://www.impawards.com/archives/latest.html', timeout = 60*60, unicode=True)
+    pages = int(find_re(html, '<a href= page(.*?).html>')) + 1
+    for page in range(pages, 0, -1):
+        for id in get_ids(page):
+            if not id in ids:
+                ids.append(id)
+    return ids
+
+def get_url(id):
+    url = u"http://www.impawards.com/%s.html" % id
+    html = read_url(url, unicode=True)
+    if find_re(html, "No Movie Posters on This Page"):
+        url = u"http://www.impawards.com/%s_ver1.html" % id
+    return url
+
+_id_map = {
+    '1933/forty_second_street': '0024034',
+    '1933/tarzan_the_fearless': '0024645',
+    '1935/informer': '0026529',
+    '1935/thirty_nine_steps': '0026529',
+    '1935/top_hat': '0027125',
+    '1938/charlie_chaplin_cavalcade': '0284687',
+    '1943/falcon_and_the_co-eds': '035855',
+    '1969/angel_angel_down_we_go': '0065602',
+    '1970/crimson_altar': '0062833',
+    '1975/man_who_would_be_king_ver1': '0073341',
+    '1975/picnic_at_hanging_rock_ver1': '0073540',
+    '1979/electric_horseman_ver1': '0079100',
+    '1980/caligula_ver1': '0080491',
+    '1980/hollywood_knights_ver1': '0080881',
+    '1981/history_of_the_world_part_i': '0082517',
+    '1981/sea_wolves': '0081470',
+    '1983/krull_ver1': '0085811',
+    '1985/warriors_of_the_wind': '0087544',
+    '1989/friday_the_thirteenth_part_viii_ver1': '0097388',
+    '1989/high_hopes': '0095302',
+    '1989/millenium': '0097883',
+    '1989/story_of_women': '0096336',
+    '1990/edward_scissorhands_ver1': '0099487',
+    '1991/freddys_dead_ver1': '0101917',
+    '1993/robocop_three_ver1': '0107978',
+    '1993/waynes_world_two_ver1': '0108525',
+    '1994/above_the_rim_ver1': '0109035',
+    '1994/helas_pour_moi': '0107175',
+    '1994/house_of_the_spirits_ver1': '0107151',
+    '1994/i_dont_want_to_talk_about_it': '0106678',
+    '1994/in_custody': '0107199',
+    '1994/ladybird_ladybird': '0110296',
+    '1994/leon_the_pig_farmer': '0104710',
+    '1994/love_after_love': '0103710',
+    '1994/l_six_two_seven': '0104658',
+    '1994/martin_lawrence_you_so_crazy_ver1': '0111804',
+    '1994/savage_nights': '0105032',
+    '1994/sex_drugs_and_democracy': '0111135',
+    '1995/bye_bye_love': '0112606',
+    '1995/cold_comfort_farm': '0112701',
+    '1995/gumby_the_movie': '0113234',
+    '1995/les_miserables': '0113828',
+    '1995/mystery_of_rampo': '0110943',
+    '1995/pharaohs_army': '0114122',
+    '1995/pure_formality': '0110917',
+    '1995/quick_and_the_dead_ver1': '0114214',
+    '1995/reflections_in_the_dark': '0110956',
+    '1995/safe_ver1': '0114323',
+    '1995/search_and_destroy': '0114371',
+    '1995/secret_of_roan_inish_ver1': '0111112',
+    '1995/underneath': '0114788',
+    '1996/ghost_in_the_shell': '0113568',
+    '1996/hate': '0113247',
+    '1996/horseman_on_the_roof': '0113362',
+    '1996/kids_in_the_hall_brain_candy': '0116768',
+    '1996/maybe_maybe_not': '0109255',
+    '1996/prisoner_of_the_mountains': '0116754',
+    '1997/fifth_element_ver1': '0119116',
+    '1997/fools_rush_in_ver1': '0119141',
+    '1997/gi_jane_ver1': '0119173',
+    '1997/happy_together_ver1': '0118845',
+    '1997/lilies': '0116882',
+    '1997/mouth_to_mouth': '0112546',
+    '1997/mr_nice_guy': '0117786',
+    '1997/nenette_and_boni': '0117221',
+    '1997/paperback_romance': '0110405',
+    '1997/second_jungle_book': '0120087',
+    '1997/single_girl': '0113057',
+    '1997/super_speedway': '0120245',
+    '1997/temptress_moon': '0116295',
+    '1998/alarmist': '0119534',
+    '1998/barneys_great_adventure_the_movie': '0120598', 
+    '1998/bulworth_ver1': '0118798',
+    '1998/celebration': '0154420',
+    '1998/east_palace_west_palace': '0119007',
+    '1998/hurricane_streets': '0119338',
+    '1998/i_married_a_strange_person': '0119346', 
+    '1998/inheritors': '0141824',
+    '1998/killing_time': '0140312',
+    '1998/live_flesh': '0118819',
+    '1998/music_from_another_room': '0119734',
+    '1998/post_coitum_ver1': '0119923',
+    '1998/steam_the_turkish_bath': '0119248',
+    '1998/velocity_of_gary': '0120878',
+    '1999/after_life': '0165078',
+    '1999/emperor_and_the_assassin': '0162866', 
+    '1999/fantasia_two_thousand': '0120910',
+    '1999/get_bruce': '0184510',
+    '1999/god_said_ha': '0119207',
+    '1999/jawbreaker': '0155776',
+    '1999/jeanne_and_the_perfect_guy': '0123923',
+    '1999/king_and_i': '0160429',
+    '1999/lovers_of_the_arctic_circle': '0133363',
+    '1999/plunkett_and_macleane': '0134033',
+    '1999/pokemon_the_first_movie': '0190641', 
+    '1999/school_of_flesh': '0157208', 
+    '1999/splendor': '0127296',
+    '1999/stranger_in_the_kingdom': '0126680',
+    '1999/train_of_life': '0170705',
+    '1999/twice_upon_a_yesterday': '0138590', 
+    '1999/whiteboys': '0178988',
+    '1999/wildfire': '0194544',
+    '1999/windhorse': '0169388',
+    '2000/claim': '0218378', 
+    '2000/color_of_paradise': '0191043',
+    '2000/criminal_lovers': '0205735',
+    '2000/everlasting_piece': '0218182',
+    '2000/girl_on_the_bridge_ver1': '0144201',
+    '2000/godzilla_two_thousand': '0188640',
+    '2000/goya_in_bordeaux': '0210717',
+    '2000/mad_about_mambo': '0156757', 
+    '2000/picking_up_the_pieces': '0192455',
+    '2000/pokemon_the_movie_2000': '0257001',
+    '2000/seven_days_to_live': '0221928',  
+    '2000/south_of_heaven_west_of_hell': '0179473',
+    '2000/suzhou_river': '0234837',
+    '2000/time_for_drunken_horses': '0259072',
+    '2000/venus_beauty_institute': '0174330',
+    '2001/circle': '0368646', 
+    '2001/devils_backbone': '0256009',
+    '2001/kill_me_later': '0243595',
+    '2001/king_is_dancing': '0244173',
+    '2001/learning_curve': '0219126',
+    '2001/marco_polo__return_to_xanadu_ver1': '0296074',
+    '2001/me_you_them': '0244504', 
+    '2001/our_lady_of_the_assassins': '0250809',
+    '2001/pinero': '0261066',
+    '2001/pokemon_three_the_movie_ver1': '0266860',
+    '2001/scratch': '0143861', 
+    '2001/vampire_hunter_d_bloodlust_ver1': '0216651',
+    '2002/el_bosque_animado': '0310790',
+    '2002/fifty_first_state': '0227984',
+    '2002/les_destinees': '0216689',
+    '2002/sons_room': '0208990',
+    '2003/open_hearts': '0315543',
+    '2003/tulse_luper_suitcases': '0307596',
+    '2003/valentin': '0296915',
+    '2004/if_only_ver1': '0332136',
+    '2004/wondrous_oblivion': '0334725',
+    '2005/wu_ji': '0417976',
+    '2006/golden_door': '0465188',
+    '2006/kin': '1091189',
+    '2007/revenge_of_the_nerds': '0088000',
+    '2008/bad_batch': '1605644',
+    '2008/mercedes': '1368083',
+    '2008/spirit': '0831887',
+    '2009/dead_air': '0993841',
+    '2009/edge_of_love': '0819714',
+    '2009/fuel': '1072437',
+    '2009/fuel': '1072437', 
+    '2009/one_good_man': '1239357',
+    '2009/st_trinians': '1210106',
+    '2009/surveillance': '0409345',
+    '2009/taken': '0936501',
+    '2009/vaml': '1610453', 
+    '2010/adopting_haiti': '1764164',
+    '2010/afterlife': '0838247',
+    '2010/agora': '1186830',
+    '2010/athlete': '1356996',
+    '2010/beneath_the_blue': '1222698',
+    '2010/bitch_slap': '1212974',
+    '2010/black_waters_of_echos_pond': '0960066',
+    '2010/case_thirty_nine': '0795351',
+    '2010/finite_and_infinite_games': '1772268',
+    '2010/hole': '1085779',
+    '2010/jolene': '0867334',
+    '2010/lake_mungo': '0816556',
+    '2010/last_day_of_summer': '1242544',
+    '2010/leaves_of_grass': '1151359',
+    '2010/life_of_lemon': '1466057',
+    '2010/man_in_the_maze': '1721692', 
+    '2010/mr_immortality_the_life_and_times_of_twista': '1711017', 
+    '2010/paper_man': '0437405', 
+    '2010/perfect_game': '0473102',
+    '2010/red_baron': '0365675',
+    '2010/satin': '0433397',
+    '2010/shutter_island': '1130884',
+    '2010/strange_powers': '1534075',
+    '2010/suicidegirls_must_die': '1584733',
+    '2010/veronika_decides_to_die': '1068678',
+    '2010/witchblade': '0494292',
+    '2010/youth_in_revolt': '0403702',
+    '2011/beastly': '1152398', 
+    '2011/burning_palms': '1283887',
+    '2011/cabin_in_the_woods': '1259521', 
+    '2011/conan': '0816462',
+    '2011/courageous': '1630036',
+    '2011/cruces_divided_two': '1698645',
+    '2011/green_with_envy': '1204342',
+    '2011/happythankyoumoreplease': '1481572',
+    '2011/homework': '1645080',
+    '2011/i_got_next': '1915570',
+    '2011/lebanon_pa': '1290082',
+    '2011/money_pet': '1965198',
+    '2011/my_suicide': '0492896',
+    '2011/priest': '0822847', 
+    '2011/prowl': '1559033',
+    '2011/red_sonja': '0800175',
+    '2011/season_of_the_witch': '0479997',
+    '2011/stay_cool': '1235807', 
+    '2011/sympathy_for_delicious': '1270277',
+    '2011/trust': '1529572',
+    '2011/undefeated': '1961604',
+    '2011/vanishing_on_seventh_street': '1452628',
+    '2011/where_is_robert_fisher': '2042712',
+    '2011/yellowbrickroad': '1398428',
+    '2012/haywire': '1506999', 
+    '2012/last_call_at_the_oasis': '2043900',
+}
+
+if __name__ == '__main__':
+    ids = get_ids()
+    print sorted(ids), len(ids)
--- a/Shared/lib/python3.4/site-packages/ox/web/itunes.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/itunes.py
@ -0,0 +1,187 @@
+# vi:si:et:sw=4:sts=4:ts=4
+# encoding: utf-8
+import re
+import urllib
+
+from ox.cache import read_url
+from ox.html import decode_html, strip_tags
+from ox.text import find_re
+from ox.text import find_string
+
+
+# to sniff itunes traffic, use something like
+# sudo tcpdump -i en1 -Avs 8192 host appleglobal.112.2o7.net
+
+# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=music&songTerm=&genreIndex=1&flavor=0&mediaType=2&composerTerm=&allArtistNames=Arcadia&ringtone=0&searchButton=submit
+# http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?media=movie&movieTerm=The%20Matrix&descriptionTerm=&ratingIndex=1&mediaType=3&directorProducerName=Andy%20Wachowski&flavor=0&releaseYearTerm=1999&closedCaption=0&actorTerm=&searchButton=submit
+
+ITUNES_HEADERS = {
+    'X-Apple-Tz': '0',
+    'X-Apple-Storefront': '143441-1',
+    'User-Agent': 'iTunes/7.6.2 (Macintosh; U; Intel Mac OS X 10.5.2)',
+    'Accept-Language': 'en-us, en;q=0.50',
+    'Accept-Encoding': 'gzip',
+    'Connection': 'close',
+}
+
+def compose_url(request, parameters):
+    if request == 'advancedSearch':
+        url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
+        if parameters['media'] == 'music':
+            url += urllib.urlencode({
+              'albumTerm': parameters['title'],
+              'allArtistNames': parameters['artist'],
+              'composerTerm': '',
+              'flavor': 0,
+              'genreIndex': 1,
+              'media': 'music',
+              'mediaType': 2,
+              'ringtone': 0,
+              'searchButton': 'submit',
+              'songTerm': ''
+            })
+        elif parameters['media'] == 'movie':
+            url += urllib.urlencode({
+              'actorTerm': '',
+              'closedCaption': 0,
+              'descriptionTerm': '',
+              'directorProducerName': parameters['director'],
+              'flavor': 0,
+              'media': 'movie',
+              'mediaType': 3,
+              'movieTerm': parameters['title'],
+              'ratingIndex': 1,
+              'releaseYearTerm': '',
+              'searchButton': 'submit'
+            })
+    elif request == 'viewAlbum':
+        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewAlbum?id=%s' % parameters['id']
+    elif request == 'viewMovie':
+        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']
+    return url
+
+def parse_xml_dict(xml):
+    values = {}
+    strings = xml.split('<key>')
+    for string in strings:
+        if string.find('</key>') != -1:
+            key = find_re(string, '(.*?)</key>')
+            type = find_re(string, '</key><(.*?)>')
+            if type == 'true/':
+                value = True
+            else:
+                value = find_re(string, '<%s>(.*?)</%s>' % (type, type))
+                if type == 'integer':
+                  value = int(value)
+                elif type == 'string':
+                  value = decode_html(value)
+            values[key] = value
+    return values
+
+def parse_cast(xml, title):
+    list = []
+    try:
+        strings = find_re(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
+        strings.pop()
+        for string in strings:
+            list.append(find_re(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+        return list
+    except:
+        return list
+
+def parse_movies(xml, title):
+    list = []
+    try:
+        strings = find_re(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
+        strings.pop()
+        for string in strings:
+            list.append({
+              'id': find_re(string, 'viewMovie\?id=(.*?)&'),
+              'title': find_re(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
+            })
+        return list
+    except:
+        return list
+
+class ItunesAlbum:
+    def __init__(self, id = '', title = '', artist = ''):
+        self.id = id
+        self.title = title
+        self.artist = artist
+        if not id:
+            self.id = self.get_id()
+
+    def get_id(self):
+        url = compose_url('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
+        xml = read_url(url, headers = ITUNES_HEADERS)
+        id = find_re(xml, 'viewAlbum\?id=(.*?)&')
+        return id
+
+    def get_data(self):
+        data = {'id': self.id}
+        url = compose_url('viewAlbum', {'id': self.id})
+        xml = read_url(url, None, ITUNES_HEADERS)
+        data['albumName'] = find_re(xml, '<B>(.*?)</B>')
+        data['artistName'] = find_re(xml, '<b>(.*?)</b>')
+        data['coverUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
+        data['genre'] = find_re(xml, 'Genre:(.*?)<')
+        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
+        data['review'] = strip_tags(find_re(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+        data['tracks'] = []
+        strings = find_re(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
+        for string in strings:
+          data['tracks'].append(parse_xml_dict(string))
+        data['type'] = find_re(xml, '<key>listType</key><string>(.*?)<')
+        return data
+
+class ItunesMovie:
+    def __init__(self, id = '', title = '', director = ''):
+        self.id = id
+        self.title = title
+        self.director = director
+        if not id:
+            self.id = self.get_id()
+
+    def get_id(self):
+        url = compose_url('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
+        xml = read_url(url, headers = ITUNES_HEADERS)
+        id = find_re(xml, 'viewMovie\?id=(.*?)&')
+        return id
+
+    def get_data(self):
+        data = {'id': self.id}
+        url = compose_url('viewMovie', {'id': self.id})
+        xml = read_url(url, None, ITUNES_HEADERS)
+        f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
+        f.write(xml)
+        f.close()
+        data['actors'] = parse_cast(xml, 'actors')
+        string = find_re(xml, 'Average Rating:(.*?)</HBoxView>')
+        data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
+        data['directors'] = parse_cast(xml, 'directors')
+        data['format'] = find_re(xml, 'Format:(.*?)<')
+        data['genre'] = decode_html(find_re(xml, 'Genre:(.*?)<'))
+        data['plotSummary'] = decode_html(find_re(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+        data['posterUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
+        data['producers'] = parse_cast(xml, 'producers')
+        data['rated'] = find_re(xml, 'Rated(.*?)<')
+        data['relatedMovies'] = parse_movies(xml, 'related movies')
+        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
+        data['runTime'] = find_re(xml, 'Run Time:(.*?)<')
+        data['screenwriters'] = parse_cast(xml, 'screenwriters')
+        data['soundtrackId'] = find_re(xml, 'viewAlbum\?id=(.*?)&')
+        data['trailerUrl'] = find_re(xml, 'autoplay="." url="(.*?)"')
+        return data
+
+if __name__ == '__main__':
+    from ox.utils import json
+    data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
+    print json.dumps(data, sort_keys = True, indent = 4)
+    data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
+    print json.dumps(data, sort_keys = True, indent = 4)
+    for v in data['relatedMovies']:
+        data = ItunesMovie(id = v['id']).get_data()
+        print json.dumps(data, sort_keys = True, indent = 4)
+    data = ItunesMovie(id='272960052').get_data()
+    print json.dumps(data, sort_keys = True, indent = 4)
+
--- a/Shared/lib/python3.4/site-packages/ox/web/lookupbyisbn.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/lookupbyisbn.py
@ -0,0 +1,42 @@
+from ox.cache import read_url
+from ox import find_re, strip_tags
+import re
+
+base = 'http://www.lookupbyisbn.com'
+
+def get_data(isbn):
+    r = {}
+    url = '%s/Search/Book/%s/1' % (base, isbn)
+
+    data = read_url(url).decode('utf-8')
+    m = re.compile('href="(/Lookup/Book/[^"]+?)"').findall(data)
+    if m:
+        ids = m[0].split('/')
+        r['isbn'] = ids[-2]
+        r['asin'] = ids[-3]
+        url = '%s%s' % (base, m[0])
+        data = read_url(url).decode('utf-8')
+        r["title"] = find_re(data, "<h2>(.*?)</h2>")
+        keys = {
+            'author': 'Author(s)',
+            'publisher': 'Publisher',
+            'date': 'Publication date',
+            'edition': 'Edition',
+            'binding': 'Binding',
+            'volume': 'Volume(s)',
+            'pages': 'Pages',
+        }
+        for key in keys:
+            r[key] = find_re(data, '<span class="title">%s:</span>(.*?)</li>'% re.escape(keys[key]))
+            if r[key] == '--':
+                r[key] = ''
+            if key == 'pages' and r[key]:
+                r[key] = int(r[key])
+        desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
+        desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
+        r['description'] = strip_tags(desc).strip()
+        if r['description'] == u'Description of this item is not available at this time.':
+            r['description'] = ''
+        r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
+    return r
+
--- a/Shared/lib/python3.4/site-packages/ox/web/lyricsfly.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/lyricsfly.py
@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from ox.cache import read_url
+from ox.html import decode_html
+from ox.text import find_re
+
+
+def get_lyrics(title, artist):
+    html = read_url('http://lyricsfly.com/api/')
+    key = find_re(html, '<font color=green><b>(.*?)</b></font>')
+    url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
+    xml = read_url(url)
+    lyrics = find_re(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
+    lyrics = lyrics.replace('\n', '').replace('\r', '')
+    lyrics = lyrics.replace('[br]', '\n').strip()
+    lyrics.replace('\n\n\n', '\n\n')
+    lyrics = decode_html(lyrics.replace('&amp;', '&'))
+    return lyrics
+
+if __name__ == '__main__':
+    print getLyrics('Election Day', 'Arcadia')
--- a/Shared/lib/python3.4/site-packages/ox/web/metacritic.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/metacritic.py
@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from urllib import quote
+from lxml.html import document_fromstring
+
+from ox.cache import read_url
+from ox import find_re, strip_tags
+
+def get_url(id=None, imdb=None):
+    if imdb:
+        url = "http://www.imdb.com/title/tt%s/criticreviews" % imdb
+        data = read_url(url)
+        metacritic_url = find_re(data, '"(http://www.metacritic.com/movie/.*?)"')
+        return metacritic_url or None
+    return 'http://www.metacritic.com/movie/%s' % id
+
+def get_id(url):
+    return url.split('/')[-1]
+
+def get_show_url(title):
+    title = quote(title)
+    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
+    data = read_url(url)
+    return find_re(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
+
+def get_data(url):
+    data = read_url(url, unicode=True)
+    doc = document_fromstring(data)
+    score = filter(lambda s: s.attrib.get('property') == 'v:average',
+                   doc.xpath('//span[@class="score_value"]'))
+    if score:
+        score = int(score[0].text)
+    else:
+        score = -1
+    authors = [a.text
+        for a in doc.xpath('//div[@class="review_content"]//div[@class="author"]//a')]
+    sources = [d.text
+        for d in doc.xpath('//div[@class="review_content"]//div[@class="source"]/a')]
+    reviews = [d.text
+        for d in doc.xpath('//div[@class="review_content"]//div[@class="review_body"]')]
+    scores = [int(d.text.strip())
+        for d in doc.xpath('//div[@class="review_content"]//div[contains(@class, "critscore")]')]
+    urls = [a.attrib['href']
+        for a in doc.xpath('//div[@class="review_content"]//a[contains(@class, "external")]')]
+
+    metacritics = []
+    for i in range(len(authors)):
+        metacritics.append({
+            'critic': authors[i],
+            'url': urls[i],
+            'source': sources[i],
+            'quote': strip_tags(reviews[i]).strip(),
+            'score': scores[i],
+        })
+        
+    return {
+        'critics': metacritics,
+        'id': get_id(url),
+        'score': score,
+        'url': url,
+    }
+
--- a/Shared/lib/python3.4/site-packages/ox/web/mininova.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/mininova.py
@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from datetime import datetime
+import re
+import socket
+from urllib import quote
+
+from ox.cache import read_url
+from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, int_value, normalize_newlines
+from ox.normalize import normalize_imdbid
+import ox
+
+from torrent import Torrent
+
+
+def _parse_results_page(data, max_results=10):
+    results=[]
+    regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
+    for row in  re.compile(regexp, re.DOTALL).findall(data):
+        torrentDate = row[0]
+        torrentExtra = row[1]
+        torrentId = row[2]
+        torrentTitle = decode_html(row[3]).strip()
+        torrentLink = "http://www.mininova.org/tor/" + torrentId
+        privateTracker = 'priv.gif' in torrentExtra
+        if not privateTracker:
+            results.append((torrentTitle, torrentLink, ''))
+    return results
+
+def find_movie(query=None, imdb=None, max_results=10):
+    '''search for torrents on mininova
+    '''
+    if imdb:
+        url = "http://www.mininova.org/imdb/?imdb=%s" % normalize_imdbid(imdb)
+    else:
+        url = "http://www.mininova.org/search/%s/seeds" % quote(query)
+    data = read_url(url, unicode=True)
+    return _parse_results_page(data, max_results)
+
+def get_id(mininovaId):
+    mininovaId = unicode(mininovaId)
+    d = find_re(mininovaId, "/(\d+)")
+    if d:
+        return d
+    mininovaId = mininovaId.split('/')
+    if len(mininovaId) == 1:
+        return mininovaId[0]
+    else:
+        return mininovaId[-1]
+
+def exists(mininovaId):
+    mininovaId = get_id(mininovaId)
+    data = ox.net.read_url("http://www.mininova.org/tor/%s" % mininovaId)
+    if not data or 'Torrent not found...' in data:
+        return False
+    if 'tracker</a> of this torrent requires registration.' in data:
+        return False
+    return True
+
+def get_data(mininovaId):
+    _key_map = {
+        'by': u'uploader',
+    }
+    mininovaId = get_id(mininovaId)
+    torrent = dict()
+    torrent[u'id'] = mininovaId
+    torrent[u'domain'] = 'mininova.org'
+    torrent[u'comment_link'] = "http://www.mininova.org/tor/%s" % mininovaId
+    torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
+    torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
+
+    data = read_url(torrent['comment_link'], unicode=True) + read_url(torrent['details_link'], unicode=True)
+    if '<h1>Torrent not found...</h1>' in data:
+        return None
+
+    for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
+        key = d[0].lower().strip()
+        key = _key_map.get(key, key)
+        value = decode_html(strip_tags(d[1].strip()))
+        torrent[key] = value
+
+    torrent[u'title'] = find_re(data, '<title>(.*?):.*?</title>')
+    torrent[u'imdbId'] = find_re(data, 'title/tt(\d{7})')
+    torrent[u'description'] = find_re(data, '<div id="description">(.*?)</div>')
+    if torrent['description']:
+        torrent['description'] = normalize_newlines(decode_html(strip_tags(torrent['description']))).strip()
+    t = read_url(torrent[u'torrent_link'])
+    torrent[u'torrent_info'] = get_torrent_info(t)
+    return torrent
+
+class Mininova(Torrent):
+    '''
+    >>> Mininova('123')
+    {}
+    >>> Mininova('1072195')['infohash']
+    '72dfa59d2338e4a48c78cec9de25964cddb64104'
+    '''
+    def __init__(self, mininovaId):
+        self.data = get_data(mininovaId)
+        if not self.data:
+            return
+        Torrent.__init__(self)
+        ratio = self.data['share ratio'].split(',')
+        self['seeder'] = -1
+        self['leecher'] = -1
+        if len(ratio) == 2:
+            val = int_value(ratio[0].replace(',','').strip())
+            if val:
+                self['seeder'] = int(val)
+            val = int_value(ratio[1].replace(',','').strip())
+            if val:
+                self['leecher'] = int(val)
+        val = int_value(self.data['downloads'].replace(',','').strip())
+        if val:
+            self['downloaded'] = int(val)
+        else:
+            self['downloaded'] = -1
+        published =  self.data['added on']
+        published = published.split(' +')[0]
+        self['published'] =  datetime.strptime(published, "%a, %d %b %Y %H:%M:%S")
+
--- a/Shared/lib/python3.4/site-packages/ox/web/movieposterdb.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/movieposterdb.py
@ -0,0 +1,44 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+
+import re
+
+from ox.cache import read_url
+from ox import find_re
+
+def get_data(id):
+    '''
+    >>> get_data('0060304')['posters'][0]
+    u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg'
+    >>> get_data('0123456')['posters']
+    []
+    '''
+    data = {
+        "url": get_url(id)
+    }
+    data["posters"] = get_posters(data["url"])
+    return data
+
+def get_id(url):
+    return url.split("/")[-2]
+
+def get_posters(url, group=True, timeout=-1):
+    posters = []
+    html = read_url(url, timeout=timeout, unicode=True)
+    if url in html:
+        if group:
+            results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
+            for result in results:
+                posters += get_posters(result, False)
+        results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
+        for result in results:
+            html = read_url(result, timeout=timeout, unicode=True)
+            posters.append(find_re(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
+    return posters
+
+def get_url(id):
+    return "http://www.movieposterdb.com/movie/%s/" % id
+
+if __name__ == '__main__':
+    print get_data('0060304')
+    print get_data('0133093')
--- a/Shared/lib/python3.4/site-packages/ox/web/opensubtitles.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/opensubtitles.py
@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+import feedparser
+from ox.cache import read_url
+from ox import find_re, strip_tags
+from ox.iso import langCode2To3, langTo3Code
+
+def find_subtitles(imdb, parts = 1, language = "eng"):
+    if len(language) == 2:
+        language = langCode2To3(language)
+    elif len(language) != 3:
+        language = langTo3Code(language)
+    url = "http://www.opensubtitles.org/en/search/"
+    if language:
+        url += "sublanguageid-%s/" % language
+    url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
+    data = read_url(url)
+    if "title>opensubtitles.com - search results</title" in data:
+        fd = feedparser.parse(data)
+        opensubtitleId = None
+        if fd.entries:
+            link = fd.entries[0]['links'][0]['href']
+            opensubtitleId = re.compile('subtitles/(.*?)/').findall(link)
+            if opensubtitleId:
+                opensubtitleId = opensubtitleId[0]
+    else:
+        opensubtitleId = find_re(data, '/en/subtitles/(.*?)/')
+    return opensubtitleId
+
+def download_subtitle(opensubtitle_id):
+    srts = {}
+    data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
+    reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
+    for f in re.compile(reg_exp, re.DOTALL).findall(data):
+        name = strip_tags(f[1]).split('\n')[0]
+        url = "http://www.opensubtitles.com%s" % f[0]
+        srts[name] = read_url(url, unicode=True)
+    return srts
+
--- a/Shared/lib/python3.4/site-packages/ox/web/oxdb.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/oxdb.py
@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import ox.cache
+
+def get_poster_url(id):
+    url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
+    if ox.cache.exists(url):
+        return url
+    return ''
+
--- a/Shared/lib/python3.4/site-packages/ox/web/piratecinema.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/piratecinema.py
@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from __future__ import print_function
+
+import re
+from ox.net import read_url
+
+def get_poster_url(id):
+    url = 'http://piratecinema.org/posters/'
+    html = read_url(url, unicode=True)
+    results = re.compile('src="(.+)" title=".+\((\d{7})\)"').findall(html)
+    for result in results:
+        if result[1] == id:
+            return url + result[0]
+    return ''
+
+if __name__ == '__main__':
+    print(get_poster_url('0749451'))
+
--- a/Shared/lib/python3.4/site-packages/ox/web/rottentomatoes.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/rottentomatoes.py
@ -0,0 +1,54 @@
+# -*- coding: UTF-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+from ox.cache import read_url
+from ox import find_re, strip_tags
+
+
+def get_url(id=None, imdb=None):
+    #this would also wor but does not cache:
+    '''
+    from urllib2 import urlopen
+    u = urlopen(url)
+    return u.url
+    '''
+    if imdb:
+        url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
+        data = read_url(url)
+        if "movie_title" in data:
+            movies = re.compile('(/m/.*?/)').findall(data)
+            if movies:
+                return "http://www.rottentomatoes.com" + movies[0]
+    return None
+
+def get_og(data, key):
+    return find_re(data, '<meta property="og:%s".*?content="(.*?)"' % key)
+
+def get_data(url):
+    data = read_url(url)
+    r = {}
+    r['title'] = find_re(data, '<h1 class="movie_title">(.*?)</h1>')
+    if '(' in r['title']:
+        r['year'] = find_re(r['title'], '\((\d*?)\)')
+        r['title'] = strip_tags(re.sub('\((\d*?)\)', '', r['title'])).strip()
+    r['summary'] = strip_tags(find_re(data, '<p id="movieSynopsis" class="movie_synopsis" itemprop="description">(.*?)</p>')).strip()
+    r['summary'] = r['summary'].replace('\t', ' ').replace('\n', ' ').replace('  ', ' ').replace('  ', ' ')
+    if not r['summary']:
+        r['summary'] = get_og(data, 'description')
+
+    meter = re.compile('<span id="all-critics-meter" class="meter(.*?)">(.*?)</span>').findall(data)
+    meter = filter(lambda m: m[1].isdigit(), meter)
+    if meter:
+        r['tomatometer'] = meter[0][1]
+    r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')
+    r['user_score'] = find_re(data, '<span class="meter popcorn numeric ">(\d+)</span>')
+    r['user_rating'] = find_re(data, 'Average Rating: ([\d.]+)/5')
+    poster = get_og(data, 'image')
+    if poster and not 'poster_default.gif' in poster:
+        r['posters'] = [poster]
+    for key in r.keys():
+        if not r[key]:
+            del r[key]
+    return r
+
--- a/Shared/lib/python3.4/site-packages/ox/web/siteparser.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/siteparser.py
@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+from six import string_types
+
+from ..cache import read_url
+from .. import decode_html
+from ..utils import datetime
+
+
+def cleanup(key, data, data_type):
+    if data:
+        if isinstance(data[0], string_types):
+            #FIXME: some types need strip_tags
+            #data = [strip_tags(decode_html(p)).strip() for p in data]
+            data = [decode_html(p).strip() for p in data]
+        elif isinstance(data[0], list) or isinstance(data[0], tuple):
+            data = [cleanup(key, p, data_type) for p in data]
+        while len(data) == 1 and not isinstance(data, string_types):
+            data = data[0]
+        if data_type == 'list' and isinstance(data, string_types):
+            data = [data, ]
+    elif data_type != 'list':
+        data = ''
+    return data
+
+class SiteParser(dict):
+    baseUrl = ''
+    regex = {}
+
+    def get_url(self, page):
+        return "%s%s" % (self.baseUrl, page)
+
+    def read_url(self, url, timeout):
+        if not url in self._cache:
+            self._cache[url] = read_url(url, timeout=timeout, unicode=True)
+        return self._cache[url]
+
+    def __init__(self, timeout=-1):
+        self._cache = {}
+        for key in self.regex:
+            url = self.get_url(self.regex[key]['page'])
+            data = self.read_url(url, timeout)
+            if isinstance(self.regex[key]['re'], string_types):
+                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
+                data = cleanup(key, data, self.regex[key]['type'])
+            elif callable(self.regex[key]['re']):
+                data = self.regex[key]['re'](data)
+            else:
+                for r in self.regex[key]['re']:
+                    if callable(r):
+                        f = r
+                    else:
+                        f = re.compile(r, re.DOTALL).findall
+                    if isinstance(data, string_types):
+                        data = f(data)
+                    else:
+                        data = [f(d) for d in data]
+                        data = cleanup(key, data, self.regex[key]['type'])
+            def apply_f(f, data):
+                if data and isinstance(data[0], list):
+                    data = [f(d) for d in data]
+                else:
+                    data = f(data)
+                return data
+            if self.regex[key]['type'] == 'float' and data:
+                data = apply_f(float, data)
+            elif self.regex[key]['type'] == 'int' and data:
+                data = apply_f(int, data)
+            elif self.regex[key]['type'] == 'date':
+                parse_date = lambda d: d and datetime.strptime('-'.join(d), '%m-%d-%Y').strftime('%Y-%m-%d')
+                data = apply_f(parse_date, data)
+            if data:
+                self[key] = data
+
--- a/Shared/lib/python3.4/site-packages/ox/web/spiegel.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/spiegel.py
@ -0,0 +1,287 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from datetime import datetime
+import re
+import time
+
+import ox.cache
+from ox.html import decode_html, strip_tags
+import ox.net
+
+
+def get_news(year, month, day):
+    sections = [
+        'politik', 'wirtschaft', 'panorama', 'sport', 'kultur', 'netzwelt',
+        'wissenschaft', 'unispiegel', 'schulspiegel', 'reise', 'auto'
+    ]
+    dt = datetime(year, month, day)
+    day = int(dt.strftime('%j'))
+    date = dt.strftime('%d.%m.%Y')
+    news = []
+    for section in sections:
+        url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
+        if date == time.strftime('%d.%m.%Y', time.localtime()):
+            html = ox.net.read_url(url)
+        else:
+            html = ox.cache.read_url(url)
+        for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
+            dateString = strip_tags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
+            try:
+                description = format_string(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
+            except:
+                description = ''
+            try:
+                imageUrl = re.compile('<img src="(.*?)"').findall(item)[0]
+            except:
+                imageUrl = ''
+            try:
+                title = format_string(re.compile('alt=[\'|"](.*?)[\'|"] title=', re.DOTALL).findall(item)[0]).replace(' : ', ': ').replace('::', ':')
+            except:
+                title = ''
+            if dateString[:10] == date and description and imageUrl and title.find(': ') != -1:
+                new = {}
+                if len(dateString) == 10:
+                    new['date'] = '%s-%s-%s 00:00' % (dateString[6:10], dateString[3:5], dateString[:2])
+                else:
+                    new['date'] = '%s-%s-%s %s:%s' % (dateString[6:10], dateString[3:5], dateString[:2], dateString[12:14], dateString[15:17])
+                # fix decode_html
+                # new['description'] = format_string(decode_html(description))
+                new['description'] = format_string(description)
+                new['imageUrl'] = imageUrl
+                new['section'] = format_section(section)
+                new['title'] = format_string(title)
+                new['title1'] = new['title'].replace('\xdf', '\xdf\xdf')[:len(format_string(re.compile('<h4>(.*?)</h4>', re.DOTALL).findall(item)[0]))].replace('\xdf\xdf', '\xdf')
+                if new['title1'][-1:] == ':':
+                    new['title1'] = new['title1'][0:-1]
+                new['title2'] = new['title'][len(new['title1']) + 2:]
+                new['url'] = re.compile('<a href="(.*?)"').findall(item)[0]
+                if new['url'][:1] == '/':
+                    new['url'] = 'http://www.spiegel.de' + new['url']
+                news.append(new)
+                # print '%s, %s' % (new['section'], dateString)
+            '''
+            elif dateString[:10] == date and not description:
+                print dateString + ' - no description'
+            elif dateString[:10] == date and not imageUrl:
+                print dateString + ' - no image'
+            '''
+    return news
+
+def split_title(title):
+    title1 = re.compile('(.*?): ').findall(title)[0]
+    title2 = re.compile(': (.*?)$').findall(title)[0]
+    return [title1, title2]
+
+def format_string(string):
+    string = string.replace('<span class="spOptiBreak"> </span>', '')
+    string = string.replace('\n', ' ').replace('  ', ' ').strip()
+    string = string.replace('&amp;', '&').replace('&apos;', '\'').replace('&quot;', '"')
+    return string
+
+def format_section(string):
+    return string[:1].upper() + string[1:].replace('spiegel', 'SPIEGEL')
+
+def format_subsection(string):
+    # SPIEGEL, SPIEGEL special
+    subsection = {
+        'abi': 'Abi - und dann?',
+        'formel1': 'Formel 1',
+        'jobundberuf': 'Job & Beruf',
+        'leben': 'Leben U21',
+        'mensch': 'Mensch & Technik',
+        'sonst': '',
+        'staedte': u'St\xc3dte',
+        'ussports': 'US-Sports',
+        'wunderbar': 'wunderBAR'
+    }
+    if subsection.has_key(string):
+        return subsection[string].replace(u'\xc3', 'ae')
+    return string[:1].upper() + string[1:]
+        
+def get_issue(year, week):
+    coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
+    if not ox.net.exists(coverUrl):
+        return None
+    url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
+    contents = []
+    data = ox.cache.read_url(url)
+    items = re.compile('<a.?href="http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=".?>(.*?)</a>').findall(data)
+    for item in items:
+        item = item[1]
+        page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
+        title = strip_tags(item).strip()
+        contents.append({'title': title, 'page': page})
+    pageUrl = {}
+    pages = page + 2
+    for page in range(1, pages + 10):
+        url = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d%04d-205.jpg' % (year, week, year, week, page)
+        if ox.cache.exists(url):
+            pageUrl[page] = url
+        else:
+            pageUrl[page] = ''
+    return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl}
+
+
+def archive_issues():
+    '''
+    this is just an example of an archiving application
+    '''
+    p = {}
+    import os
+    from ox.utils import json
+    import time
+    archivePath = '/Volumes/Rolux Home/Desktop/Data/spiegel.de/Der Spiegel'
+    localtime = time.localtime()
+    year = int(time.strftime('%Y', localtime))
+    week = int(time.strftime('%W', localtime))
+    for y in range(year, 1993, -1):
+        if y == year:
+            wMax = week + 1
+        else:
+            wMax = 53
+        for w in range(wMax, 0, -1):
+            print 'get_issue(%d, %d)' % (y, w)
+            issue = get_issue(y, w)
+            if issue:
+                dirname = '%s/%d/%02d' % (archivePath, y, w)
+                if not os.path.exists(dirname):
+                    os.makedirs(dirname)
+                filename = '%s/Der Spiegel %d %02d.json' % (dirname, y, w)
+                if not os.path.exists(filename):
+                    data = json.dumps(issue, ensure_ascii = False)
+                    f = open(filename, 'w')
+                    f.write(data)
+                    f.close()
+                filename = '%s/Der Spiegel %d %02d.txt' % (dirname, y, w)
+                if not os.path.exists(filename):
+                    data = []
+                    for item in issue['contents']:
+                        data.append('%3d %s' % (item['page'], item['title']))
+                    data = '\n'.join(data)
+                    f = open(filename, 'w')
+                    f.write(data)
+                    f.close()
+                filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
+                if not os.path.exists(filename):
+                    data = ox.cache.read_url(issue['coverUrl'])
+                    f = open(filename, 'w')
+                    f.write(data)
+                    f.close()
+                for page in issue['pageUrl']:
+                    url = issue['pageUrl'][page]
+                    if url:
+                        filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
+                        if not os.path.exists(filename):
+                            data = ox.cache.read_url(url)
+                            f = open(filename, 'w')
+                            f.write(data)
+                            f.close()
+                if not p:
+                    p = {'num': 1, 'sum': issue['pages'], 'min': issue['pages'], 'max': issue['pages']}
+                else:
+                    p['num'] += 1
+                    p['sum'] += issue['pages']
+                    if issue['pages'] < p['min']:
+                        p['min'] = issue['pages']
+                    if issue['pages'] > p['max']:
+                        p['max'] = issue['pages']
+                print p['min'], p['sum'] / p['num'], p['max']
+            
+
+def archive_news():
+    '''
+    this is just an example of an archiving application
+    '''
+    import os
+    from ox.utils import json
+    import time
+
+    count = {}
+    colon = []
+
+    archivePath = '/Volumes/Rolux Home/Desktop/Data/spiegel.de/Spiegel Online'
+    days = [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+    localtime = time.localtime()
+    year = int(time.strftime('%Y', localtime))
+    month = int(time.strftime('%m', localtime))
+    day = int(time.strftime('%d', localtime)) - 1
+    for y in range(year, 1999, -1):
+        if y == year:
+            mMax = month
+        else:
+            mMax = 12
+        for m in range(mMax, 0, -1):
+            if y == year and m == month:
+                dMax = day
+            elif m == 2 and y % 4 == 0 and y % 400 != 0:
+                dMax = days[m] + 1
+            else:
+                dMax = days[m]
+            for d in range(dMax, 0, -1):
+                print 'getNews(%d, %d, %d)' % (y, m, d)
+                news = getNews(y, m ,d)
+                for new in news:
+                    dirname = archivePath + '/' + new['date'][0:4] + '/' + new['date'][5:7] + new['date'][8:10] + '/' + new['date'][11:13] + new['date'][14:16]
+                    if not os.path.exists(dirname):
+                        os.makedirs(dirname)
+                    if new['url'][-5:] == '.html':
+                        filename = dirname + '/' + new['url'].split('/')[-1][:-5] + '.json'
+                    else:
+                        filename = dirname + '/' + new['url'] + '.json'
+                    if not os.path.exists(filename) or True:
+                        data = json.dumps(new, ensure_ascii = False)
+                        f = open(filename, 'w')
+                        f.write(data)
+                        f.close()
+                    filename = filename[:-5] + '.txt'
+                    if not os.path.exists(filename) or True:
+                        data = split_title(new['title'])
+                        data.append(new['description'])
+                        data = '\n'.join(data)
+                        f = open(filename, 'w')
+                        f.write(data)
+                        f.close()
+                    filename = dirname + '/' + new['imageUrl'].split('/')[-1]
+                    if not os.path.exists(filename):
+                        data = ox.cache.read_url(new['imageUrl'])
+                        f = open(filename, 'w')
+                        f.write(data)
+                        f.close()
+
+                    strings = new['url'].split('/')
+                    string = strings[3]
+                    if len(strings) == 6:
+                        string += '/' + strings[4]
+                    if not count.has_key(string):
+                        count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
+                    else:
+                        count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
+                    strings = split_title(new['title'])
+                    if strings[0] != new['title1'] or strings[1] != new['title2']:
+                        colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
+            for key in sorted(count):
+                print '%6d %-24s %s' % (count[key]['count'], key, count[key]['string'])
+            for value in colon:
+                print value
+
+if __name__ == '__main__':
+    # spiegel = Spiegel(2008, 8)
+    # print spiegel.getContents()
+    # news = News(2001, 9, 10)
+    # output(news.getNews())
+    '''
+    x = []
+    for d in range(10, 30):
+        print '2/%d' % d
+        news = getNews(2008, 2, d)
+        for new in news:
+            strings = new['url'].split('/')
+            string = format_section(strings[3])
+            if len(strings) == 6:
+                string += '/' + format_subsection(strings[4])
+            if not string in x:
+                x.append(string)
+        print x
+    '''
+    # archive_issues()
+    archive_news()
--- a/Shared/lib/python3.4/site-packages/ox/web/thepiratebay.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/thepiratebay.py
@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from datetime import datetime
+import re
+import socket
+from urllib import quote, urlencode
+from urllib2 import URLError
+
+from ox import find_re, cache, strip_tags, decode_html, get_torrent_info, normalize_newlines
+from ox.normalize import normalize_imdbid
+import ox
+
+from torrent import Torrent
+
+cache_timeout = 24*60*60 # cache search only for 24 hours
+
+season_episode = re.compile("S..E..", re.IGNORECASE)
+
+
+def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False):
+    headers = headers.copy()
+    headers['Cookie'] = 'language=en_EN'
+    return cache.read_url(url, data, headers, timeout, unicode=unicode)
+
+def find_movies(query=None, imdb=None, max_results=10):
+    if imdb:
+        query = "tt" + normalize_imdbid(imdb)
+    results = []
+    next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
+    page_count = 1
+    while next and page_count < 4:
+        page_count += 1
+        url = next[0]
+        if not url.startswith('http'):
+            if not url.startswith('/'):
+                url = "/" + url
+            url = "http://thepiratebay.org" + url
+        data = read_url(url, timeout=cache_timeout, unicode=True)
+        regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
+        for row in  re.compile(regexp, re.DOTALL).findall(data):
+            torrentType = row[0]
+            torrentLink = "http://thepiratebay.org" + row[1]
+            torrentTitle = decode_html(row[2])
+            # 201 = Movies , 202 = Movie DVDR, 205 TV Shows
+            if torrentType in ['201']:
+                results.append((torrentTitle, torrentLink, ''))
+            if len(results) >= max_results:
+                return results
+        next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(data)
+    return results
+
+def get_id(piratebayId):
+    if piratebayId.startswith('http://torrents.thepiratebay.org/'):
+        piratebayId = piratebayId.split('org/')[1]
+    d = find_re(piratebayId, "tor/(\d+)")
+    if d:
+        piratebayId = d
+    d = find_re(piratebayId, "torrent/(\d+)")
+    if d:
+        piratebayId = d
+    return piratebayId
+
+def exists(piratebayId):
+    piratebayId = get_id(piratebayId)
+    return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
+
+def get_data(piratebayId):
+    _key_map = {
+      'spoken language(s)': u'language',
+      'texted language(s)': u'subtitle language',
+      'by': u'uploader',
+      'leechers': 'leecher',
+      'seeders': 'seeder',
+    }
+    piratebayId = get_id(piratebayId)
+    torrent = dict()
+    torrent[u'id'] = piratebayId
+    torrent[u'domain'] = 'thepiratebay.org'
+    torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
+
+    data = read_url(torrent['comment_link'], unicode=True)
+    torrent[u'title'] = find_re(data, '<title>(.*?) \(download torrent\) - TPB</title>')
+    if not torrent[u'title']:
+        return None
+    torrent[u'title'] = decode_html(torrent[u'title']).strip()
+    torrent[u'imdbId'] = find_re(data, 'title/tt(\d{7})')
+    title = quote(torrent['title'].encode('utf-8'))
+    torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, title)
+    for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
+        key = d[0].lower().strip()
+        key = _key_map.get(key, key)
+        value = decode_html(strip_tags(d[1].strip()))
+        torrent[key] = value
+    torrent[u'description'] = find_re(data, '<div class="nfo">(.*?)</div>')
+    if torrent[u'description']:
+        torrent['description'] = normalize_newlines(decode_html(strip_tags(torrent['description']))).strip()
+    t = read_url(torrent[u'torrent_link'])
+    torrent[u'torrent_info'] = get_torrent_info(t)
+    return torrent
+
+class Thepiratebay(Torrent):
+    '''
+    >>> Thepiratebay('123')
+    {}
+
+    >>> Thepiratebay('3951349')['infohash']
+    '4e84415d36ed7b54066160c05a0b0f061898d12b'
+    '''
+    def __init__(self, piratebayId):
+        self.data = get_data(piratebayId)
+        if not self.data:
+            return
+        Torrent.__init__(self)
+        published =  self.data['uploaded']
+        published = published.replace(' GMT', '').split(' +')[0]
+        self['published'] =  datetime.strptime(published, "%Y-%m-%d %H:%M:%S")
+
--- a/Shared/lib/python3.4/site-packages/ox/web/torrent.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/torrent.py
@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from ox import int_value
+
+
+class Torrent(dict):
+    '''
+    >>> Torrent()
+    {'files': 1, 'domain': u'', 'subtitle language': u'', 'seeder': -1, 'description': u'', 'language': u'', 'title': u'', 'imdbId': u'', 'downloaded': -1, 'leecher': -1, 'torrent_link': u'', 'torrent_info': {}, 'published': u'', 'announce': '', 'infohash': '', 'id': u'', 'comment_link': u'', 'size': -1}
+    '''
+    _string_keys = ('id', 'title', 'description', 'infohash', 'torrent_link', 'comment_link', 
+                   'imdbId', 'announce', 'domain', 'published', 'language', 'subtitle language')
+    _int_keys = ('size', 'seeder', 'leecher', 'downloaded', 'files')
+    _dict_keys = ('torrent_info', )
+    _list_keys = ()
+    data = {'torrent_info': {}}
+
+    def __init__(self):
+        for key in self._string_keys:
+            self[key] = self.data.get(key, u'')
+        for key in self._dict_keys:
+            self[key] = self.data.get(key, {})
+        for key in self._list_keys:
+            self[key] = self.data.get(key, [])
+        for key in self._int_keys:
+            value = self.data.get(key, -1)
+            if not isinstance(value, int):
+                value = int(int_value(value))
+            self[key] = value
+        self['infohash'] = self.data['torrent_info'].get('hash', '')
+        self['size'] = self.data['torrent_info'].get('size', -1)
+        self['announce'] = self.data['torrent_info'].get('announce', '')
+        if 'files' in self.data['torrent_info']:
+            self['files'] = len(self.data['torrent_info']['files'])
+        else:
+            self['files'] =  1
+
--- a/Shared/lib/python3.4/site-packages/ox/web/tv.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/tv.py
@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+import time
+
+from ox import strip_tags, find_re
+from ox.cache import read_url
+
+
+def get_episode_data(url):
+    '''
+      prases informatin on tvcom episode pages
+      returns dict with title, show, description, score
+      example:
+        get_episode_data('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
+    '''
+    data = read_url(url, unicode=True)
+    r = {}
+    r['description'] = strip_tags(find_re(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
+    r['show'] = find_re(data, '<h1>(.*?)</h1>')
+    r['title'] =  find_re(data, '<title>.*?: (.*?) - TV.com  </title>')
+    #episode score
+    r['episode score'] = find_re(data, '<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">(.*?)</span>')
+
+    match = re.compile('Episode Number: (\d*?) &nbsp;&nbsp; Season Num: (\d*?) &nbsp;&nbsp; First Aired: (.*?) &nbsp').findall(data) 
+    if match:
+        r['season'] = int(match[0][1])
+        r['episode'] = int(match[0][0])
+        #'Wednesday September 29, 2004' -> 2004-09-29 
+        r['air date'] = time.strftime('%Y-%m-%d', time.strptime(match[0][2], '%A %B %d, %Y'))
+    return r
+
--- a/Shared/lib/python3.4/site-packages/ox/web/twitter.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/twitter.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from datetime import datetime
+from urllib import quote
+
+import lxml.html
+import ox
+from ox.cache import read_url
+
+def find(query=None, user=None, timeout=60):
+    if user:
+        url = 'https://twitter.com/' + quote(user)
+    else:
+        url = 'https://twitter.com/search/' + quote(query)
+    data = ox.cache.read_url(url, timeout=timeout).decode('utf-8')
+    doc = lxml.html.document_fromstring(data)
+    tweets = []
+    for e in doc.xpath("//div[contains(@class, 'original-tweet')]"):
+        t = lxml.html.tostring(e)
+        text = e.xpath(".//p[contains(@class, 'js-tweet-text')]")[0]
+        html = lxml.html.tostring(text, encoding='unicode').strip()
+        text = ox.decode_html(ox.strip_tags(html)).strip()
+        user = re.compile('data-name="(.*?)"').findall(t)[0]
+        user = ox.decode_html(ox.strip_tags(user)).strip()
+        tweets.append({
+            'id': re.compile('data-tweet-id="(\d+)"').findall(t)[0],
+            'user-id': re.compile('data-user-id="(\d+)"').findall(t)[0],
+            'name': re.compile('data-screen-name="(.*?)"').findall(t)[0],
+            'time': datetime.fromtimestamp(int(re.compile('data-time="(\d+)"').findall(t)[0])),
+            'user': user,
+            'text': text,
+            'html': html,
+        })
+    return tweets
--- a/Shared/lib/python3.4/site-packages/ox/web/ubu.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/ubu.py
@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+
+from ox import find_re, strip_tags, decode_html
+from ox.cache import read_url
+
+
+def get_id(url):
+    return url.replace('http://www.ubu.com/', '').split('.html')[0]
+
+def get_url(id):
+    return 'http://www.ubu.com/%s.html' % id
+
+def get_data(url):
+    if not url.startswith('http:'):
+        url = get_url(url)
+    data = read_url(url, unicode=True)
+    m = {
+        'id': get_id(url),
+        'url': url,
+        'type': re.compile('ubu.com/(.*?)/').findall(url)[0]
+    }
+    for videourl, title in re.compile('<a href="(http://ubumexico.centro.org.mx/.*?)">(.*?)</a>').findall(data):
+        if videourl.endswith('.srt'):
+            m['srt'] = videourl
+        elif not 'video' in m:
+            m['video'] = videourl
+            m['video'] = m['video'].replace('/video/ ', '/video/').replace(' ', '%20')
+            if m['video'] == 'http://ubumexico.centro.org.mx/video/':
+                del m['video']
+            m['title'] = strip_tags(decode_html(title)).strip()
+    if not 'url' in m:
+        print url, 'missing'
+    if 'title' in m:
+        m['title'] = re.sub('(.*?) \(\d{4}\)$', '\\1', m['title'])
+
+    match = re.compile("flashvars','file=(.*?.flv)'").findall(data)
+    if match:
+        m['flv'] = match[0]
+        m['flv'] = m['flv'].replace('/video/ ', '/video/').replace(' ', '%20')
+
+    y = re.compile('\((\d{4})\)').findall(data)
+    if y:
+        m['year'] = int(y[0])
+    d = re.compile('Director: (.+)').findall(data)
+    if d:
+        m['director'] = strip_tags(decode_html(d[0])).strip()
+
+    a = re.compile('<a href="(.*?)">Back to (.*?)</a>', re.DOTALL).findall(data)
+    if a:
+        m['artist'] = strip_tags(decode_html(a[0][1])).strip()
+    else:
+        a = re.compile('<a href="(.*?)">(.*?) in UbuWeb Film').findall(data)
+        if a:
+            m['artist'] = strip_tags(decode_html(a[0][1])).strip()
+        else:
+            a = re.compile('<b>(.*?)\(b\..*?\d{4}\)').findall(data)
+            if a:
+                m['artist'] = strip_tags(decode_html(a[0])).strip()
+            elif m['id'] == 'film/lawder_color':
+                m['artist'] = 'Standish Lawder'
+    if 'artist' in m:
+        m['artist'] = m['artist'].replace('in UbuWeb Film', '')
+        m['artist'] = m['artist'].replace('on UbuWeb Film', '').strip()
+    if m['id'] == 'film/coulibeuf':
+        m['title'] = 'Balkan Baroque'
+        m['year'] = 1999
+    return m
+
+def get_films():
+    ids = get_ids()
+    films = []
+    for id in ids:
+        info = get_data(id)
+        if info['type'] == 'film' and ('flv' in info or 'video' in info):
+            films.append(info)
+    return films
+
+def get_ids():
+    data = read_url('http://www.ubu.com/film/')
+    ids = []
+    author_urls = []
+    for url, author in re.compile('<a href="(\./.*?)">(.*?)</a>').findall(data):
+        url = 'http://www.ubu.com/film' + url[1:]
+        data = read_url(url)
+        author_urls.append(url)
+        for u, title in re.compile('<a href="(.*?)">(.*?)</a>').findall(data):
+            if not u.startswith('http'):
+                if u == '../../sound/burroughs.html':
+                    u = 'http://www.ubu.com/sound/burroughs.html'
+                elif u.startswith('../'):
+                    u = 'http://www.ubu.com/' + u[3:]
+                else:
+                    u = 'http://www.ubu.com/film/' + u
+                if u not in author_urls and u.endswith('.html'):
+                    ids.append(u)
+    ids = [get_id(url) for url in list(set(ids))]
+    return ids
--- a/Shared/lib/python3.4/site-packages/ox/web/vimeo.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/vimeo.py
@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+import re
+from StringIO import StringIO
+import xml.etree.ElementTree as ET
+
+from ox.cache import read_url
+from ox import find_string, find_re
+
+
+def get_data(id):
+    url = 'http://www.vimeo.com/moogaloop/load/clip:%s' %id
+    xml = read_url(url)
+    tree = ET.parse(StringIO(xml))
+    request_signature = tree.find('request_signature').text
+    request_signature_expires = tree.find('request_signature_expires').text
+    
+    data = {}
+    video_url = "http://www.vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=" % \
+                              (id, request_signature, request_signature_expires)
+    data['video_sd'] = video_url + 'sd'
+    data['video_hd'] = video_url + 'hd'
+    video = tree.find('video')
+    for key in ('caption', 'width', 'height', 'duration', 'thumbnail'):
+        data[key] = video.find(key).text
+    return data
+
--- a/Shared/lib/python3.4/site-packages/ox/web/wikipedia.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/wikipedia.py
@ -0,0 +1,156 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from __future__ import print_function
+
+import re
+
+from six.moves import urllib
+
+from ox.utils import json
+from ox.cache import read_url
+from ox import find_re
+
+
+def get_id(url):
+    return url.split("/")[-1]
+
+def get_url(id=None, imdb=None, allmovie=None):
+    if imdb:
+        query = '"%s"'% imdb
+        result = find(query)
+        if result:
+            url = result[0][1]
+            data = get_movie_data(url)
+            if 'imdb_id' in data:
+                return url
+        return ""
+    if allmovie:
+        query = '"amg_id = 1:%s"'% allmovie
+        result = find(query)
+        if result:
+            url = result[0][1]
+            return url
+        return ''
+    return "http://en.wikipedia.org/wiki/%s" % id
+
+def get_movie_id(title, director='', year=''):
+    query = '"%s" film %s %s' % (title, director, year)
+    result = find(query, 1)
+    if result:
+        return result[0][1]
+    return ''
+
+def get_wiki_data(wikipedia_url):
+    url = wikipedia_url.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
+    url = "%s&action=raw" % url
+    data = read_url(url).decode('utf-8')
+    return data
+
+def get_movie_data(wikipedia_url):
+    if not wikipedia_url.startswith('http'):
+        wikipedia_url = get_url(wikipedia_url)
+    data = get_wiki_data(wikipedia_url)
+    filmbox_data = find_re(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
+    filmbox = {}
+    _box = filmbox_data.strip().split('|')
+    for row in _box:
+        d = row.split('=')
+        if len(d) == 2:
+            _key = d[0].strip()
+            if _key:
+                key = _key
+                if key[0] == '|':
+                    key = key[1:]
+                key = key.strip()
+                value = d[1].strip()
+                value = value.replace('<!-- see WP:ALT -->', '')
+                if '<br>' in value:
+                    value = value.split('<br>')
+                if value:
+                    if key in filmbox:
+                        if isinstance(value, list) and isinstance(filmbox[key], basestring):
+                            filmbox[key] = [filmbox[key]] + value
+                        else:
+                            filmbox[key] += value
+                        if isinstance(filmbox[key], list):
+                            filmbox[key] = [k for k in filmbox[key] if k]
+                    else:
+                        filmbox[key] = value
+    if not filmbox_data:
+        return filmbox
+    if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit():
+        del filmbox['amg_id']
+    if 'Allmovie movie' in data:
+        filmbox['amg_id'] = find_re(data, 'Allmovie movie\|.*?(\d+)')
+    elif 'Allmovie title' in data:
+        filmbox['amg_id'] = find_re(data, 'Allmovie title\|.*?(\d+)')
+
+    if 'Official website' in data:
+        filmbox['website'] = find_re(data, 'Official website\|(.*?)}').strip()
+
+    r = re.compile('{{IMDb title\|id=(\d{7})', re.IGNORECASE).findall(data)
+    if r:
+        filmbox['imdb_id'] = r[0]
+    else:
+        r = re.compile('{{IMDb title\|(\d{7})', re.IGNORECASE).findall(data)
+        if r:
+            filmbox['imdb_id'] = r[0]
+
+    r = re.compile('{{Internet Archive.*?\|id=(.*?)[\|}]', re.IGNORECASE).findall(data)
+    if r:
+        filmbox['archiveorg_id'] = r[0]
+
+    r = re.compile('{{mojo title\|(.*?)[\|}]', re.IGNORECASE).findall(data)
+    if r:
+        filmbox['mojo_id'] = r[0].replace('id=', '')
+
+    r = re.compile('{{rotten-tomatoes\|(.*?)[\|}]', re.IGNORECASE).findall(data)
+    if r:
+        filmbox['rottentomatoes_id'] = r[0].replace('id=', '')
+    if 'google video' in data:
+        filmbox['google_video_id'] = find_re(data, 'google video\|.*?(\d*?)[\|}]')
+    if 'DEFAULTSORT' in data:
+        filmbox['title_sort'] = find_re(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
+    return filmbox
+
+def get_image_url(name):
+    url = 'http://en.wikipedia.org/wiki/Image:' + name.replace(' ', '%20')
+    data = read_url(url)
+    url = find_re(data, 'href="(http://upload.wikimedia.org/.*?)"')
+    if not url:
+        url = find_re(data, 'href="(//upload.wikimedia.org/.*?)"')
+        if url:
+            url = 'http:' + url
+    return url
+
+def get_poster_url(wikipedia_url):
+    if not wikipedia_url.startswith('http'): wikipedia_url = get_url(wikipedia_url)
+    data = get_movie_data(wikipedia_url)
+    if 'image' in data:
+        return get_image_url(data['image'])
+    return ''
+
+def get_movie_poster(wikipedia_url):
+    # deprecated, use get_poster_url()
+    return get_poster_url(wikipedia_url)
+
+def get_allmovie_id(wikipedia_url):
+    data = get_movie_data(wikipedia_url)
+    return data.get('amg_id', '')
+
+def find(query, max_results=10):
+    query = {'action': 'query', 'list':'search', 'format': 'json',
+             'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
+    url = "http://en.wikipedia.org/w/api.php?" + urllib.parse.urlencode(query)
+    data = read_url(url)
+    if not data:
+        data  = read_url(url, timeout=0)
+    result = json.loads(data.decode('utf-8'))
+    results = []
+    if result and 'query' in result:
+        for r in result['query']['search']:
+            title = r['title']
+            url = "http://en.wikipedia.org/wiki/%s" % title.replace(' ', '_')
+            results.append((title, url, ''))
+    return results
+
--- a/Shared/lib/python3.4/site-packages/ox/web/youtube.py
+++ b/Shared/lib/python3.4/site-packages/ox/web/youtube.py
@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+# vi:si:et:sw=4:sts=4:ts=4
+from urllib import quote, unquote_plus
+import urllib2
+import cookielib
+import re
+from xml.dom.minidom import parseString
+import json
+
+import feedparser
+import ox
+from ox.cache import read_url, cache_timeout
+
+
+def get_id(url):
+    match = re.compile('v=(.+?)($|&)').findall(url)
+    if match:
+        return match[0][0]
+
+def get_url(id):
+    return 'http://www.youtube.com/watch?v=%s' % id
+
+def video_url(youtubeId, format='mp4', timeout=cache_timeout):
+    """
+        youtubeId - if of video
+        format - video format, options: webm, 1080p, 720p, mp4, high
+    """
+    fmt = None
+    if format == '4k':
+        fmt=38
+    elif format == '1080p':
+        fmt=37
+    elif format == '720p':
+        fmt=22
+    elif format == 'mp4':
+        fmt=18
+    elif format == 'high':
+        fmt=35
+    elif format == 'webm':
+        streams = videos(youtubeId, 'webm')
+        return streams[max(streams.keys())]['url']
+
+    streams = videos(youtubeId)
+    if str(fmt) in streams:
+        return streams[str(fmt)]['url']
+
+def get_video_info(id):
+    eurl = get_url(id)
+    data = read_url(eurl)
+    t = re.compile('\W[\'"]?t[\'"]?: ?[\'"](.+?)[\'"]').findall(data)
+    if t:
+        t = t[0]
+    else:
+        raise IOError
+    url = "http://www.youtube.com/get_video_info?&video_id=%s&el=$el&ps=default&eurl=%s&hl=en_US&t=%s" % (id, quote(eurl), quote(t))
+    data = read_url(url)
+    info = {}
+    for part in data.split('&'):
+        key, value = part.split('=')
+        info[key] = unquote_plus(value).replace('+', ' ')
+    return info
+
+def find(query, max_results=10, offset=1, orderBy='relevance'):
+    query = quote(query)
+    url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
+    data = read_url(url)
+    fd = feedparser.parse(data)
+    videos = []
+    for item in fd.entries:
+        id = item['id'].split('/')[-1]
+        title = item['title']
+        description = item['description']
+        videos.append((title, id, description))
+        if len(videos) >= max_results:
+            return videos
+    return videos
+
+def info(id, timeout=cache_timeout):
+    info = {}
+    if id.startswith('http'):
+        id = get_id(id)
+        if not id:
+            return info
+    url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
+    data = read_url(url, timeout=timeout)
+    xml = parseString(data)
+    info['id'] = id
+    info['url'] = get_url(id)
+    info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
+    info['description'] = xml.getElementsByTagName('media:description')[0].firstChild.data
+    info['date'] = xml.getElementsByTagName('published')[0].firstChild.data.split('T')[0]
+    info['author'] = "http://www.youtube.com/user/%s"%xml.getElementsByTagName('name')[0].firstChild.data
+
+    info['categories'] = []
+    for cat in xml.getElementsByTagName('media:category'):
+        info['categories'].append(cat.firstChild.data)
+
+    k = xml.getElementsByTagName('media:keywords')[0].firstChild
+    if k:
+        info['keywords'] = k.data.split(', ')
+    data = read_url(info['url'], timeout=timeout)
+    match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
+    if match:
+        info['license'] = match[0].strip()
+        info['license'] = re.sub('<.+?>', '', info['license']).strip()
+
+    url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1" % id
+    data = read_url(url, timeout=timeout)
+    xml = parseString(data)
+    languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')]
+    if languages:
+        info['subtitles'] = {}
+        for language in languages:
+            url = "http://www.youtube.com/api/timedtext?hl=en&v=%s&type=track&lang=%s&name&kind"%(id, language)
+            data = read_url(url, timeout=timeout)
+            xml = parseString(data)
+            subs = []
+            for t in xml.getElementsByTagName('text'):
+                start = float(t.getAttribute('start'))
+                duration = t.getAttribute('dur')
+                if not duration:
+                    duration = '2'
+                end = start + float(duration)
+                if t.firstChild:
+                    text = t.firstChild.data
+                    subs.append({
+                        'in': start,
+                        'out': end,
+                        'value': ox.decode_html(text),
+                    })
+            info['subtitles'][language] = subs
+    return info
+
+def videos(id, format=''):
+    stream_type = {
+        'flv': 'video/x-flv',
+        'webm': 'video/webm',
+        'mp4': 'video/mp4'
+    }.get(format)
+    info = get_video_info(id)
+    stream_map = info['url_encoded_fmt_stream_map']
+    streams = {}
+    for x in stream_map.split(','):
+        stream = {}
+        #for s in x.split('\\u0026'):
+        for s in x.split('&'):
+            key, value = s.split('=')
+            value = unquote_plus(value)
+            stream[key] = value
+        if 'url' in stream and 'sig' in stream:
+            stream['url'] = '%s&signature=%s' % (stream['url'], stream['sig'])
+        if not stream_type or stream['type'].startswith(stream_type):
+            streams[stream['itag']] = stream
+    return streams
+
+def playlist(url):
+    data = read_url(url)
+    items = []
+    for i in list(set(re.compile('<a href="(/watch\?v=.*?)" title="(.*?)" ').findall(data))):
+        items.append({
+            'title': i[1],
+            'url': 'http://www.youtube.com' + i[0].split('&amp;')[0]
+        })
+    return items
+
+def download_webm(id, filename):
+    stream_type = 'video/webm'
+    url = "http://www.youtube.com/watch?v=%s" % id
+    cj = cookielib.CookieJar()
+    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
+    opener.addheaders = [
+        ('User-Agent',
+         'Mozilla/5.0 (X11; Linux i686; rv:2.0) Gecko/20100101 Firefox/4.0'),
+        ('Accept-Language', 'en-us, en;q=0.50')
+    ]
+    u = opener.open(url)
+    data = u.read()
+    u.close()
+    match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
+    streams = {}
+    for x in match[0].split(','):
+        stream = {}
+        for s in x.split('\\u0026'):
+            key, value = s.split('=')
+            value = unquote_plus(value)
+            stream[key] = value
+        if stream['type'].startswith(stream_type):
+            streams[stream['itag']] = stream
+    if streams:
+        s = max(streams.keys())
+        url = streams[s]['url']
+        if 'sig' in streams[s]:
+            url += 'signature=' + streams[s]['sig']
+    else:
+        return None
+
+    #download video and save to file.
+    u = opener.open(url)
+    f = open(filename, 'w')
+    data = True
+    while data:
+        data = u.read(4096)
+        f.write(data)
+    f.close()
+    u.close()
+    return filename
+
+def get_config(id):
+    if id.startswith('http'):
+        url = id
+    else:
+        url = get_url(id)
+    data = read_url(url)
+    match = re.compile('ytplayer.config = (.*?);<').findall(data)
+    if match:
+        config = json.load(match[0])
+    return config