ox.web under_score api rewrite

2012-08-15 17:15:40 +02:00 · 2012-08-15 17:15:40 +02:00 · a4fd3c930f
commit a4fd3c930f
parent bb35daa95c
29 changed files with 268 additions and 285 deletions
--- a/ox/movie.py
+++ b/ox/movie.py
@ -307,6 +307,8 @@ def parse_movie_path(path):
    title = title.replace('_ ', ': ')
    if title.endswith('_'):
        title = title[:-1] + '.'
    if title.startswith('_'):
        title = '.' + title[1:]
    year = find_re(title, '(\(\d{4}\))')
    if not year:
@ -344,8 +346,9 @@ def parse_movie_path(path):
    else:
        season = None
-    episode = find_re(parts[-1], '\.Episode (\d+)\.')
+    episode = find_re(parts[-1], '\.Episode[s]* ([\d+]+)\.')
    if episode:
        episode = episode.split('+')[0]
        episode = int(episode)
    else:
        episode = None
--- a/ox/oembed.py
+++ b/ox/oembed.py
@ -7,7 +7,7 @@ from utils import json, ET
 def get_embed_code(url, maxwidth=None, maxheight=None):
    embed = {}
-    header = cache.getHeaders(url)
+    header = cache.get_headers(url)
    if header.get('content-type', '').startswith('text/html'):
        html = cache.readUrl(url)
        json_oembed = filter(lambda l: 'json+oembed' in l, re.compile('<link.*?>').findall(html))
--- a/ox/web/allmovie.py
+++ b/ox/web/allmovie.py
@ -7,68 +7,68 @@ from ox import strip_tags, find_re
 from ox.cache import read_url
-def getId(url):
+def get_id(url):
    return url.split("/")[-1]
-def getData(id):
+def get_data(id):
    '''
-    >>> getData('129689')['cast'][1][1]
+    >>> get_data('129689')['cast'][1][1]
    u'Marianne'
-    >>> getData('129689')['credits'][0][0]
+    >>> get_data('129689')['credits'][0][0]
    u'Jean-Luc Godard'
-    >>> getData('129689')['posters'][0]
+    >>> get_data('129689')['posters'][0]
    u'http://image.allmusic.com/00/adg/cov200/dru800/u812/u81260bbffr.jpg'
-    >>> getData('129689')['rating']
+    >>> get_data('129689')['rating']
    u'4.5'
    '''
    if id.startswith('http'):
-        id = getId(id)
+        id = get_id(id)
    data = {
-        "url": getUrl(id)
+        "url": get_url(id)
    }
    html = read_url(data["url"], unicode=True)
-    data['aka'] = parseList(html, 'AKA')
+    data['aka'] = parse_list(html, 'AKA')
    data['category'] = find_re(html, '<dt>category</dt>.*?<dd>(.*?)</dd>')
-    data['countries'] = parseList(html, 'countries')
+    data['countries'] = parse_list(html, 'countries')
-    data['director'] = parseEntry(html, 'directed by')
+    data['director'] = parse_entry(html, 'directed by')
-    data['genres'] = parseList(html, 'genres')
+    data['genres'] = parse_list(html, 'genres')
-    data['keywords'] = parseList(html, 'keywords')
+    data['keywords'] = parse_list(html, 'keywords')
    data['posters'] = [find_re(html, '<img src="(http://cps-.*?)"')]
-    data['produced'] = parseList(html, 'produced by')
+    data['produced'] = parse_list(html, 'produced by')
    data['rating'] = find_re(html, 'Stars" title="(.*?) Stars"')
-    data['released'] = parseEntry(html, 'released by')
+    data['released'] = parse_entry(html, 'released by')
-    data['releasedate'] = parseList(html, 'release date')
+    data['releasedate'] = parse_list(html, 'release date')
-    data['runtime'] = parseEntry(html, 'run time').replace('min.', '').strip()
+    data['runtime'] = parse_entry(html, 'run time').replace('min.', '').strip()
-    data['set'] = parseEntry(html, 'set in')
+    data['set'] = parse_entry(html, 'set in')
    data['synopsis'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
-    data['themes'] = parseList(html, 'themes')
+    data['themes'] = parse_list(html, 'themes')
-    data['types'] = parseList(html, 'types')
+    data['types'] = parse_list(html, 'types')
    data['year'] = find_re(html, '<span class="year">.*?(\d+)')
    #data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)]
    data['stills'] = re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)
    #html = read_url("http://allmovie.com/work/%s/cast" % id, unicode=True)
-    #data['cast'] = parseTable(html)
+    #data['cast'] = parse_table(html)
    #html = read_url("http://allmovie.com/work/%s/credits" % id, unicode=True)
-    #data['credits'] = parseTable(html)
+    #data['credits'] = parse_table(html)
    html = read_url("http://allmovie.com/work/%s/review" % id, unicode=True)
    data['review'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
    return data
-def getUrl(id):
+def get_url(id):
    return "http://allmovie.com/work/%s" % id
-def parseEntry(html, title):
+def parse_entry(html, title):
    html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title)
    return strip_tags(html).strip()
-def parseList(html, title):
+def parse_list(html, title):
    html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
    r = map(lambda x: strip_tags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
    if not r and html:
        r = [strip_tags(html)]
    return r
-def parseTable(html):
+def parse_table(html):
    return map(
        lambda x: map(
            lambda x: strip_tags(x).strip().replace('&nbsp;', ''),
@ -77,10 +77,10 @@ def parseTable(html):
        find_re(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
    )
-def parseText(html, title):
+def parse_text(html, title):
    return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
 if __name__ == '__main__':
-    print getData('129689')
+    print get_data('129689')
-    # print getData('177524')
+    # print get_data('177524')
--- a/ox/web/amazon.py
+++ b/ox/web/amazon.py
@ -13,17 +13,17 @@ def findISBN(title, author):
    data = read_url(url, unicode=True)
    links = re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)
    id = find_re(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/')
-    data = getData(id)
+    data = get_data(id)
    if author in data['authors']:
        return data
    return {}
-def getData(id):
+def get_data(id):
    url = "http://www.amazon.com/title/dp/%s/" % id
    data = read_url(url, unicode=True)
-    def findData(key):
+    def find_data(key):
        return find_re(data, '<li><b>%s:</b>(.*?)</li>'% key).strip()
    r = {}
@ -34,15 +34,15 @@ def getData(id):
    t = re.compile('>(.*?)</a> \(Translator\)').findall(data)
    if t:
        r['translator'] = t
-    r['publisher'] = findData('Publisher')
+    r['publisher'] = find_data('Publisher')
-    r['language'] = findData('Language')
+    r['language'] = find_data('Language')
-    r['isbn-10'] = findData('ISBN-10')
+    r['isbn-10'] = find_data('ISBN-10')
-    r['isbn-13'] = findData('ISBN-13').replace('-', '')
+    r['isbn-13'] = find_data('ISBN-13').replace('-', '')
    r['dimensions'] = find_re(data, '<li><b>.*?Product Dimensions:.*?</b>(.*?)</li>')
-    r['pages'] = findData('Paperback')
+    r['pages'] = find_data('Paperback')
    if not r['pages']:
-        r['pages'] = findData('Hardcover')
+        r['pages'] = find_data('Hardcover')
    r['review'] = strip_tags(find_re(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
--- a/ox/web/apple.py
+++ b/ox/web/apple.py
@ -14,7 +14,7 @@ HEADERS = {
 USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7) '
 USER_AGENT += 'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3'
-def getMovieData(title, director):
+def get_movie_data(title, director):
    if isinstance(title, unicode):
        title = title.encode('utf-8')
    if isinstance(director, unicode):
@ -60,8 +60,8 @@ def getMovieData(title, director):
    return data
 if __name__ == '__main__':
-    print getMovieData('Alphaville', 'Jean-Luc Godard')
+    print get_movie_data('Alphaville', 'Jean-Luc Godard')
-    print getMovieData('Sin City', 'Roberto Rodriguez')
+    print get_movie_data('Sin City', 'Roberto Rodriguez')
-    print getMovieData('Breathless', 'Jean-Luc Godard')
+    print get_movie_data('Breathless', 'Jean-Luc Godard')
-    print getMovieData('Capitalism: A Love Story', 'Michael Moore')
+    print get_movie_data('Capitalism: A Love Story', 'Michael Moore')
-    print getMovieData('Film Socialisme', 'Jean-Luc Godard')
+    print get_movie_data('Film Socialisme', 'Jean-Luc Godard')
--- a/ox/web/archive.py
+++ b/ox/web/archive.py
@ -3,15 +3,15 @@
 from .. import cache
 from ..utils import json
-def getId(url):
+def get_id(url):
    return url.split("/")[-1]
-def getUrl(id):
+def get_url(id):
    return "http://www.archive.org/details/%s" % id
-def getData(id):
+def get_data(id):
    data = {}
-    url = getUrl(id)
+    url = get_url(id)
    details = cache.read_url('%s?output=json' % url)
    details = json.loads(details)
    for key in ('title', 'description', 'runtime'):
--- a/ox/web/criterion.py
+++ b/ox/web/criterion.py
@ -9,25 +9,25 @@ from ox.text import find_re, remove_special_characters
 import imdb
-def getId(url):
+def get_id(url):
    return url.split("/")[-1]
-def getUrl(id):
+def get_url(id):
    return "http://www.criterion.com/films/%s" % id
-def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
+def get_data(id, timeout=ox.cache.cache_timeout, get_imdb=False):
    '''
-    >>> getData('1333')['imdbId']
+    >>> get_data('1333')['imdbId']
    u'0060304'
-    >>> getData('236')['posters'][0]
+    >>> get_data('236')['posters'][0]
    u'http://criterion_production.s3.amazonaws.com/release_images/1586/ThirdManReplace.jpg'
-    >>> getData('786')['posters'][0]
+    >>> get_data('786')['posters'][0]
    u'http://criterion_production.s3.amazonaws.com/product_images/185/343_box_348x490.jpg'
    '''
    data = {
-        "url": getUrl(id)
+        "url": get_url(id)
    }
    try:
        html = read_url(data["url"], timeout=timeout, unicode=True)
@ -71,21 +71,21 @@ def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
    if timeout == ox.cache.cache_timeout:
        timeout = -1
    if get_imdb:
-        data['imdbId'] = imdb.getMovieId(data['title'],
+        data['imdbId'] = imdb.get_movie_id(data['title'],
            data['director'], data['year'], timeout=timeout)
    return data
-def getIds():
+def get_ids():
    ids = []
    html = read_url("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine", unicode=True)
    results = re.compile("\&amp;p=(\d+)\&").findall(html)
    pages = max(map(int, results))
    for page in range(1, pages):
-        for id in getIdsByPage(page):
+        for id in get_idsByPage(page):
            ids.append(id)
    return map(lambda id: str(id), sorted(map(lambda id: int(id), set(ids))))
-def getIdsByPage(page):
+def get_idsByPage(page):
    ids = []
    url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
    html = read_url(url, unicode=True)
@ -101,4 +101,4 @@ def getIdsByPage(page):
    return set(ids)
 if __name__ == '__main__':
-    print getIds()
+    print get_ids()
--- a/ox/web/dailymotion.py
+++ b/ox/web/dailymotion.py
@ -5,7 +5,7 @@ from urllib import unquote
 from ox.cache import read_url
-def getVideoUrl(url):
+def get_video_url(url):
    '''
    >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3opar_priere-pour-refuznik-1-jeanluc-goda_shortfilms').split('?auth')[0]
    'http://www.dailymotion.com/cdn/FLV-320x240/video/x3opar_priere-pour-refuznik-1-jean-luc-god_shortfilms.flv'
--- a/ox/web/epguides.py
+++ b/ox/web/epguides.py
@ -9,7 +9,7 @@ from ox.cache import read_url
 import google
-def getShowUrl(title):
+def get_show_url(title):
    ''' 
    Search Epguide Url for Show via Show Title.
    Use Google to search the url, this is also done on Epguide.
@ -20,7 +20,7 @@ def getShowUrl(title):
                    return url
    return None
-def getShowData(url):
+def get_show_data(url):
    data = read_url(url, unicode=True)
    r = {}
    r['title'] = strip_tags(find_re(data, '<h1>(.*?)</h1>'))
--- a/ox/web/flixter.py
+++ b/ox/web/flixter.py
@ -9,28 +9,28 @@ from ox import find_re, strip_tags
 from ox.web.imdb import ImdbCombined
-def getData(id, timeout=-1):
+def get_data(id, timeout=-1):
    '''
-    >>> getData('the-matrix')['poster']
+    >>> get_data('the-matrix')['poster']
    'http://content7.flixster.com/movie/16/90/52/1690525_gal.jpg'
-    >>> getData('0133093')['poster']
+    >>> get_data('0133093')['poster']
    'http://content7.flixster.com/movie/16/90/52/1690525_gal.jpg'
-    >>> getData('2-or-3-things-i-know-about-her')['poster']
+    >>> get_data('2-or-3-things-i-know-about-her')['poster']
    'http://content6.flixster.com/movie/10/95/43/10954392_gal.jpg'
-    >>> getData('0078875')['rottentomatoes_id']
+    >>> get_data('0078875')['rottentomatoes_id']
    'http://www.rottentomatoes.com/m/the-tin-drum/'
    '''
    if len(id) == 7:
        try:
            int(id)
-            id = getIdByImdb(id)
+            id = get_id(imdb=id)
        except:
            pass
    data = {
-        "url": getUrl(id),
+        "url": get_url(id),
    }
    html = read_url(data['url'], timeout=timeout, timeout=True)
    doc = document_fromstring(html)
@ -55,21 +55,20 @@ def getData(id, timeout=-1):
        return None
    return data
-def getIdByImdb(imdbId):
+def get_id(url=None, imdb=None):
    '''
-    >>> getIdByImdb('0133093')
+    >>> get_id(imdb='0133093')
    u'the-matrix'
-    #>>> getIdByImdb('0060304')
+    #>>> get_id(imdb='0060304')
    #u'2-or-3-things-i-know-about-her'
    '''
-    i = ImdbCombined(imdbId)
+    if imdb:
-    title = i['title']
+        i = ImdbCombined(imdb)
-    return title.replace(' ', '-').lower().replace("'", '')
+        title = i['title']
-
+        return title.replace(' ', '-').lower().replace("'", '')
 def getId(url):
    return url.split('/')[-1]
-def getUrl(id):
+def get_url(id):
    return "http://www.flixster.com/movie/%s"%id
--- a/ox/web/freebase.py
+++ b/ox/web/freebase.py
@ -5,7 +5,7 @@ import json
 from ox.cache import read_url
 from ox import find_re
-class Imdb(dict):
+class Freebase(dict):
    def __init__(self, id, timeout=-1):
        url = "http://ids.freebaseapps.com/get_ids?id=/authority/imdb/title/tt%s" % id
        '''
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -20,7 +20,7 @@ def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.
    headers = headers.copy()
    return ox.cache.read_url(url, data, headers, timeout, unicode=unicode)
-def getUrl(id):
+def get_url(id):
    return "http://www.imdb.com/title/tt%s/" % id
 class Imdb(SiteParser):
@ -420,7 +420,7 @@ class ImdbCombined(Imdb):
        self.regex = _regex
        super(ImdbCombined, self).__init__(id, timeout)
-def getMovieIdByTitle(title, timeout=-1):
+def get_movie_by_title(title, timeout=-1):
    '''
    This only works for exact title matches from the data dump
    Usually in the format
@ -431,22 +431,22 @@ def getMovieIdByTitle(title, timeout=-1):
    If there is more than one film with that title for the year
        Title (Year/I)
-    >>> getMovieIdByTitle(u'"Father Knows Best" (1954) {(#5.34)}')
+    >>> get_movie_by_title(u'"Father Knows Best" (1954) {(#5.34)}')
    u'1602860'
-    >>> getMovieIdByTitle(u'The Matrix (1999)')
+    >>> get_movie_by_title(u'The Matrix (1999)')
    u'0133093'
-    >>> getMovieIdByTitle(u'Little Egypt (1951)')
+    >>> get_movie_by_title(u'Little Egypt (1951)')
    u'0043748'
-    >>> getMovieIdByTitle(u'Little Egypt (1897/I)')
+    >>> get_movie_by_title(u'Little Egypt (1897/I)')
    u'0214882'
-    >>> getMovieIdByTitle(u'Little Egypt')
+    >>> get_movie_by_title(u'Little Egypt')
    None 
-    >>> getMovieIdByTitle(u'"Dexter" (2006) {Father Knows Best (#1.9)}')
+    >>> get_movie_by_title(u'"Dexter" (2006) {Father Knows Best (#1.9)}')
    u'0866567'
    '''
    params = {'s':'tt','q': title}
@ -465,21 +465,21 @@ def getMovieIdByTitle(title, timeout=-1):
        return results[0]
    return None
-def getMovieId(title, director='', year='', timeout=-1):
+def get_movie_id(title, director='', year='', timeout=-1):
    '''
-    >>> getMovieId('The Matrix')
+    >>> get_movie_id('The Matrix')
    u'0133093'
-    >>> getMovieId('2 or 3 Things I Know About Her', 'Jean-Luc Godard')
+    >>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard')
    u'0060304'
-    >>> getMovieId('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
+    >>> get_movie_id('2 or 3 Things I Know About Her', 'Jean-Luc Godard', '1967')
    u'0060304'
-    >>> getMovieId(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard')
+    >>> get_movie_id(u"Histoire(s) du cinema: Le controle de l'univers", 'Jean-Luc Godard')
    u'0179214'
-    >>> getMovieId(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
+    >>> get_movie_id(u"Histoire(s) du cinéma: Le contrôle de l'univers", 'Jean-Luc Godard')
    u'0179214'
    '''
    imdbId = {
@ -555,12 +555,12 @@ def getMovieId(title, director='', year='', timeout=-1):
    #or nothing
    return ''
-def getMoviePoster(imdbId):
+def get_movie_poster(imdbId):
    '''
-    >>> getMoviePoster('0133093')
+    >>> get_movie_poster('0133093')
    'http://ia.media-imdb.com/images/M/MV5BMjEzNjg1NTg2NV5BMl5BanBnXkFtZTYwNjY3MzQ5._V1._SX338_SY475_.jpg'
-    >>> getMoviePoster('0994352')
+    >>> get_movie_poster('0994352')
    'http://ia.media-imdb.com/images/M/MV5BMjA3NzMyMzU1MV5BMl5BanBnXkFtZTcwNjc1ODUwMg@@._V1._SX594_SY755_.jpg'
    '''
    info = ImdbCombined(imdbId)
@ -570,10 +570,10 @@ def getMoviePoster(imdbId):
        poster = find_re(data, 'img id="primary-img".*?src="(.*?)"')
        return poster
    elif 'series' in info:
-        return getMoviePoster(info['series'])
+        return get_movie_poster(info['series'])
    return ''
-def maxVotes():
+def max_votes():
    url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
    data = ox.cache.read_url(url)
    votes = max([int(v.replace(',', ''))
@ -581,7 +581,7 @@ def maxVotes():
    return votes
 def guess(title, director='', timeout=-1):
-    return getMovieId(title, director, timeout=timeout)
+    return get_movie_id(title, director, timeout=timeout)
 if __name__ == "__main__":
    import json
--- a/ox/web/impawards.py
+++ b/ox/web/impawards.py
@ -7,19 +7,19 @@ from ox.html import strip_tags
 from ox.text import find_re
-def getData(id):
+def get_data(id):
    '''
-    >>> getData('1991/silence_of_the_lambs')['imdbId']
+    >>> get_data('1991/silence_of_the_lambs')['imdbId']
    u'0102926'
-    >>> getData('1991/silence_of_the_lambs')['posters'][0]
+    >>> get_data('1991/silence_of_the_lambs')['posters'][0]
    u'http://www.impawards.com/1991/posters/silence_of_the_lambs_ver1.jpg'
-    >>> getData('1991/silence_of_the_lambs')['url']
+    >>> get_data('1991/silence_of_the_lambs')['url']
    u'http://www.impawards.com/1991/silence_of_the_lambs_ver1.html'
    '''
    data = {
-        'url': getUrl(id)
+        'url': get_url(id)
    }
    html = read_url(data['url'], unicode=True)
    data['imdbId'] = find_re(html, 'imdb.com/title/tt(\d{7})')
@ -48,7 +48,7 @@ def getData(id):
    return data
-def getId(url):
+def get_id(url):
    split = url.split('/')
    year = split[3]
    split = split[4][:-5].split('_')
@ -59,26 +59,25 @@ def getId(url):
    id = '%s/%s' % (year, '_'.join(split))
    return id
-def getIds():
+def get_ids(page=None):
    ids = []
    if page:
        html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout = -1, unicode=True)
        results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
        for result in results:
            url = 'http://impawards.com/%s' % result
            ids.append(get_id(url))
        return set(ids)
    #get all
    html = read_url('http://www.impawards.com/archives/latest.html', timeout = 60*60, unicode=True)
    pages = int(find_re(html, '<a href= page(.*?).html>')) + 1
    for page in range(pages, 0, -1):
-        for id in getIdsByPage(page):
+        for id in get_ids(page):
            if not id in ids:
                ids.append(id)
    return ids
-def getIdsByPage(page):
+def get_url(id):
    ids = []
    html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout = -1, unicode=True)
    results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
    for result in results:
        url = 'http://impawards.com/%s' % result
        ids.append(getId(url))
    return set(ids)
 def getUrl(id):
    url = u"http://www.impawards.com/%s.html" % id
    html = read_url(url, unicode=True)
    if find_re(html, "No Movie Posters on This Page"):
@ -297,5 +296,5 @@ _id_map = {
 }
 if __name__ == '__main__':
-    ids = getIds()
+    ids = get_ids()
    print sorted(ids), len(ids)
--- a/ox/web/itunes.py
+++ b/ox/web/itunes.py
@ -24,7 +24,7 @@ ITUNES_HEADERS = {
    'Connection': 'close',
 }
-def composeUrl(request, parameters):
+def compose_url(request, parameters):
    if request == 'advancedSearch':
        url = 'http://ax.phobos.apple.com.edgesuite.net/WebObjects/MZSearch.woa/wa/advancedSearch?'
        if parameters['media'] == 'music':
@ -60,7 +60,7 @@ def composeUrl(request, parameters):
        url = 'http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewMovie?id=%s&prvw=1' % parameters['id']
    return url
-def parseXmlDict(xml):
+def parse_xml_dict(xml):
    values = {}
    strings = xml.split('<key>')
    for string in strings:
@ -78,7 +78,7 @@ def parseXmlDict(xml):
            values[key] = value
    return values
-def parseCast(xml, title):
+def parse_cast(xml, title):
    list = []
    try:
        strings = find_re(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
@ -89,7 +89,7 @@ def parseCast(xml, title):
    except:
        return list
-def parseMovies(xml, title):
+def parse_movies(xml, title):
    list = []
    try:
        strings = find_re(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
@ -109,17 +109,17 @@ class ItunesAlbum:
        self.title = title
        self.artist = artist
        if not id:
-            self.id = self.getId()
+            self.id = self.get_id()
-    def getId(self):
+    def get_id(self):
-        url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
+        url = compose_url('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
        xml = read_url(url, headers = ITUNES_HEADERS)
        id = find_re(xml, 'viewAlbum\?id=(.*?)&')
        return id
-    def getData(self):
+    def get_data(self):
        data = {'id': self.id}
-        url = composeUrl('viewAlbum', {'id': self.id})
+        url = compose_url('viewAlbum', {'id': self.id})
        xml = read_url(url, None, ITUNES_HEADERS)
        data['albumName'] = find_re(xml, '<B>(.*?)</B>')
        data['artistName'] = find_re(xml, '<b>(.*?)</b>')
@ -130,7 +130,7 @@ class ItunesAlbum:
        data['tracks'] = []
        strings = find_re(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
        for string in strings:
-          data['tracks'].append(parseXmlDict(string))
+          data['tracks'].append(parse_xml_dict(string))
        data['type'] = find_re(xml, '<key>listType</key><string>(.*?)<')
        return data
@ -140,48 +140,48 @@ class ItunesMovie:
        self.title = title
        self.director = director
        if not id:
-            self.id = self.getId()
+            self.id = self.get_id()
-    def getId(self):
+    def get_id(self):
-        url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
+        url = compose_url('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
        xml = read_url(url, headers = ITUNES_HEADERS)
        id = find_re(xml, 'viewMovie\?id=(.*?)&')
        return id
-    def getData(self):
+    def get_data(self):
        data = {'id': self.id}
-        url = composeUrl('viewMovie', {'id': self.id})
+        url = compose_url('viewMovie', {'id': self.id})
        xml = read_url(url, None, ITUNES_HEADERS)
        f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
        f.write(xml)
        f.close()
-        data['actors'] = parseCast(xml, 'actors')
+        data['actors'] = parse_cast(xml, 'actors')
        string = find_re(xml, 'Average Rating:(.*?)</HBoxView>')
        data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
-        data['directors'] = parseCast(xml, 'directors')
+        data['directors'] = parse_cast(xml, 'directors')
        data['format'] = find_re(xml, 'Format:(.*?)<')
        data['genre'] = decode_html(find_re(xml, 'Genre:(.*?)<'))
        data['plotSummary'] = decode_html(find_re(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        data['posterUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
-        data['producers'] = parseCast(xml, 'producers')
+        data['producers'] = parse_cast(xml, 'producers')
        data['rated'] = find_re(xml, 'Rated(.*?)<')
-        data['relatedMovies'] = parseMovies(xml, 'related movies')
+        data['relatedMovies'] = parse_movies(xml, 'related movies')
        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
        data['runTime'] = find_re(xml, 'Run Time:(.*?)<')
-        data['screenwriters'] = parseCast(xml, 'screenwriters')
+        data['screenwriters'] = parse_cast(xml, 'screenwriters')
        data['soundtrackId'] = find_re(xml, 'viewAlbum\?id=(.*?)&')
        data['trailerUrl'] = find_re(xml, 'autoplay="." url="(.*?)"')
        return data
 if __name__ == '__main__':
    from ox.utils import json
-    data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').getData()
+    data = ItunesAlbum(title = 'So Red the Rose', artist = 'Arcadia').get_data()
    print json.dumps(data, sort_keys = True, indent = 4)
-    data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').getData()
+    data = ItunesMovie(title = 'The Matrix', director = 'Wachowski').get_data()
    print json.dumps(data, sort_keys = True, indent = 4)
    for v in data['relatedMovies']:
-        data = ItunesMovie(id = v['id']).getData()
+        data = ItunesMovie(id = v['id']).get_data()
        print json.dumps(data, sort_keys = True, indent = 4)
-    data = ItunesMovie(id='272960052').getData()
+    data = ItunesMovie(id='272960052').get_data()
    print json.dumps(data, sort_keys = True, indent = 4)
--- a/ox/web/lyricsfly.py
+++ b/ox/web/lyricsfly.py
@ -5,7 +5,7 @@ from ox.html import decode_html
 from ox.text import find_re
-def getLyrics(title, artist):
+def get_lyrics(title, artist):
    html = read_url('http://lyricsfly.com/api/')
    key = find_re(html, '<font color=green><b>(.*?)</b></font>')
    url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
--- a/ox/web/metacritic.py
+++ b/ox/web/metacritic.py
@ -7,25 +7,24 @@ from lxml.html import document_fromstring
 from ox.cache import read_url
 from ox import find_re, strip_tags
-def getUrl(id):
+def get_url(id=None, imdb=None):
    if imdb:
        url = "http://www.imdb.com/title/tt%s/criticreviews" % imdb
        data = read_url(url)
        metacritic_url = find_re(data, '"(http://www.metacritic.com/movie/.*?)"')
        return metacritic_url or None
    return 'http://www.metacritic.com/movie/%s' % id
-def getId(url):
+def get_id(url):
    return url.split('/')[-1]
-def getUrlByImdb(imdb):
+def get_show_url(title):
    url = "http://www.imdb.com/title/tt%s/criticreviews" % imdb
    data = read_url(url)
    metacritic_url = find_re(data, '"(http://www.metacritic.com/movie/.*?)"')
    return metacritic_url or None
 def getMetacriticShowUrl(title):
    title = quote(title)
    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
    data = read_url(url)
    return find_re(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
-def getData(url):
+def get_data(url):
    data = read_url(url, unicode=True)
    doc = document_fromstring(data)
    score = filter(lambda s: s.attrib.get('property') == 'v:average',
@ -57,7 +56,7 @@ def getData(url):
    return {
        'critics': metacritics,
-        'id': getId(url),
+        'id': get_id(url),
        'score': score,
        'url': url,
    }
--- a/ox/web/mininova.py
+++ b/ox/web/mininova.py
@ -13,7 +13,7 @@ import ox
 from torrent import Torrent
-def _parseResultsPage(data, max_results=10):
+def _parse_results_page(data, max_results=10):
    results=[]
    regexp = '''<tr><td>(.*?)</td><td>(.*?)<a href="/tor/(.*?)">(.*?)</a>.*?</td>.*?</tr>'''
    for row in  re.compile(regexp, re.DOTALL).findall(data):
@ -27,22 +27,17 @@ def _parseResultsPage(data, max_results=10):
            results.append((torrentTitle, torrentLink, ''))
    return results
-def findMovie(query, max_results=10):
+def find_movie(query=None, imdb=None, max_results=10):
    '''search for torrents on mininova
    '''
-    url = "http://www.mininova.org/search/%s/seeds" % quote(query)
+    if imdb:
        url = "http://www.mininova.org/imdb/?imdb=%s" % normalize_imdbid(imdb)
    else:
        url = "http://www.mininova.org/search/%s/seeds" % quote(query)
    data = read_url(url, unicode=True)
-    return _parseResultsPage(data, max_results)
+    return _parse_results_page(data, max_results)
-def findMovieByImdb(imdbId):
+def get_id(mininovaId):
    '''find torrents on mininova for a given imdb id
    '''
    results = []
    imdbId = normalize_imdbid(imdbId)
    data = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdbId, unicode=True)
    return _parseResultsPage(data)
 def getId(mininovaId):
    mininovaId = unicode(mininovaId)
    d = find_re(mininovaId, "/(\d+)")
    if d:
@ -54,7 +49,7 @@ def getId(mininovaId):
        return mininovaId[-1]
 def exists(mininovaId):
-    mininovaId = getId(mininovaId)
+    mininovaId = get_id(mininovaId)
    data = ox.net.read_url("http://www.mininova.org/tor/%s" % mininovaId)
    if not data or 'Torrent not found...' in data:
        return False
@ -62,11 +57,11 @@ def exists(mininovaId):
        return False
    return True
-def getData(mininovaId):
+def get_data(mininovaId):
    _key_map = {
        'by': u'uploader',
    }
-    mininovaId = getId(mininovaId)
+    mininovaId = get_id(mininovaId)
    torrent = dict()
    torrent[u'id'] = mininovaId
    torrent[u'domain'] = 'mininova.org'
@ -101,7 +96,7 @@ class Mininova(Torrent):
    '72dfa59d2338e4a48c78cec9de25964cddb64104'
    '''
    def __init__(self, mininovaId):
-        self.data = getData(mininovaId)
+        self.data = get_data(mininovaId)
        if not self.data:
            return
        Torrent.__init__(self)
--- a/ox/web/movieposterdb.py
+++ b/ox/web/movieposterdb.py
@ -6,39 +6,39 @@ import re
 from ox.cache import read_url
 from ox import find_re
-def getData(id):
+def get_data(id):
    '''
-    >>> getData('0060304')['posters'][0]
+    >>> get_data('0060304')['posters'][0]
    u'http://www.movieposterdb.com/posters/06_03/1967/0060304/l_99688_0060304_639fdd1e.jpg'
-    >>> getData('0123456')['posters']
+    >>> get_data('0123456')['posters']
    []
    '''
    data = {
-        "url": getUrl(id)
+        "url": get_url(id)
    }
-    data["posters"] = getPostersByUrl(data["url"])
+    data["posters"] = get_posters(data["url"])
    return data
-def getId(url):
+def get_id(url):
    return url.split("/")[-2]
-def getPostersByUrl(url, group=True, timeout=-1):
+def get_posters(url, group=True, timeout=-1):
    posters = []
    html = read_url(url, timeout=timeout, unicode=True)
    if url in html:
        if group:
            results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
            for result in results:
-                posters += getPostersByUrl(result, False)
+                posters += get_posters(result, False)
        results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
        for result in results:
            html = read_url(result, timeout=timeout, unicode=True)
            posters.append(find_re(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
    return posters
-def getUrl(id):
+def get_url(id):
    return "http://www.movieposterdb.com/movie/%s/" % id
 if __name__ == '__main__':
-    print getData('0060304')
+    print get_data('0060304')
-    print getData('0133093')
+    print get_data('0133093')
--- a/ox/web/opensubtitles.py
+++ b/ox/web/opensubtitles.py
@ -7,7 +7,7 @@ from ox.cache import read_url
 from ox import find_re, strip_tags
 from ox import langCode2To3, langTo3Code
-def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
+def find_subtitles(imdb, parts = 1, language = "eng"):
    if len(language) == 2:
        language = langCode2To3(language)
    elif len(language) != 3:
@ -29,7 +29,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
        opensubtitleId = find_re(data, '/en/subtitles/(.*?)/')
    return opensubtitleId
-def downloadSubtitleById(opensubtitle_id):
+def download_subtitle(opensubtitle_id):
    srts = {}
    data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
    reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
--- a/ox/web/oxdb.py
+++ b/ox/web/oxdb.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import ox.cache
-def getPosterUrl(id):
+def get_poster_url(id):
    url = "http://0xdb.org/%s/poster.0xdb.jpg" % id
    if ox.cache.exists(url):
        return url
--- a/ox/web/piratecinema.py
+++ b/ox/web/piratecinema.py
@ -3,7 +3,7 @@
 import re
 from ox.net import read_url
-def getPosterUrl(id):
+def get_poster_url(id):
    url = 'http://piratecinema.org/posters/'
    html = read_url(url, unicode=True)
    results = re.compile('src="(.+)" title=".+\((\d{7})\)"').findall(html)
@ -13,5 +13,5 @@ def getPosterUrl(id):
    return ''
 if __name__ == '__main__':
-    print getPosterUrl('0749451')
+    print get_poster_url('0749451')
--- a/ox/web/rottentomatoes.py
+++ b/ox/web/rottentomatoes.py
@ -2,29 +2,30 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from ox.cache import getHeaders, read_url
+from ox.cache import read_url
 from ox import find_re, strip_tags
-def getUrlByImdb(imdb):
+def get_url(id=None, imdb=None):
    #this would also wor but does not cache:
    '''
    from urllib2 import urlopen
    u = urlopen(url)
    return u.url
    '''
-    url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
+    if imdb:
-    data = read_url(url)
+        url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
-    if "movie_title" in data:
+        data = read_url(url)
-        movies = re.compile('(/m/.*?/)').findall(data)
+        if "movie_title" in data:
-        if movies:
+            movies = re.compile('(/m/.*?/)').findall(data)
-            return "http://www.rottentomatoes.com" + movies[0]
+            if movies:
                return "http://www.rottentomatoes.com" + movies[0]
    return None
 def get_og(data, key):
    return find_re(data, '<meta property="og:%s".*?content="(.*?)"' % key)
-def getData(url):
+def get_data(url):
    data = read_url(url)
    r = {}
    r['title'] = find_re(data, '<h1 class="movie_title">(.*?)</h1>')
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -27,7 +27,7 @@ class SiteParser(dict):
    baseUrl = ''
    regex = {}
-    def getUrl(self, page):
+    def get_url(self, page):
        return "%s%s" % (self.baseUrl, page)
    def read_url(self, url, timeout):
@ -35,7 +35,7 @@ class SiteParser(dict):
    def __init__(self, timeout=-1):
        for key in self.regex:
-            url = self.getUrl(self.regex[key]['page'])
+            url = self.get_url(self.regex[key]['page'])
            data = self.read_url(url, timeout)
            if isinstance(self.regex[key]['re'], basestring):
                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
--- a/ox/web/spiegel.py
+++ b/ox/web/spiegel.py
@ -9,7 +9,7 @@ from ox.html import decode_html, strip_tags
 import ox.net
-def getNews(year, month, day):
+def get_news(year, month, day):
    sections = [
        'politik', 'wirtschaft', 'panorama', 'sport', 'kultur', 'netzwelt',
        'wissenschaft', 'unispiegel', 'schulspiegel', 'reise', 'auto'
@ -27,7 +27,7 @@ def getNews(year, month, day):
        for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
            dateString = strip_tags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
            try:
-                description = formatString(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
+                description = format_string(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
            except:
                description = ''
            try:
@ -35,7 +35,7 @@ def getNews(year, month, day):
            except:
                imageUrl = ''
            try:
-                title = formatString(re.compile('alt=[\'|"](.*?)[\'|"] title=', re.DOTALL).findall(item)[0]).replace(' : ', ': ').replace('::', ':')
+                title = format_string(re.compile('alt=[\'|"](.*?)[\'|"] title=', re.DOTALL).findall(item)[0]).replace(' : ', ': ').replace('::', ':')
            except:
                title = ''
            if dateString[:10] == date and description and imageUrl and title.find(': ') != -1:
@ -45,12 +45,12 @@ def getNews(year, month, day):
                else:
                    new['date'] = '%s-%s-%s %s:%s' % (dateString[6:10], dateString[3:5], dateString[:2], dateString[12:14], dateString[15:17])
                # fix decode_html
-                # new['description'] = formatString(decode_html(description))
+                # new['description'] = format_string(decode_html(description))
-                new['description'] = formatString(description)
+                new['description'] = format_string(description)
                new['imageUrl'] = imageUrl
-                new['section'] = formatSection(section)
+                new['section'] = format_section(section)
-                new['title'] = formatString(title)
+                new['title'] = format_string(title)
-                new['title1'] = new['title'].replace('\xdf', '\xdf\xdf')[:len(formatString(re.compile('<h4>(.*?)</h4>', re.DOTALL).findall(item)[0]))].replace('\xdf\xdf', '\xdf')
+                new['title1'] = new['title'].replace('\xdf', '\xdf\xdf')[:len(format_string(re.compile('<h4>(.*?)</h4>', re.DOTALL).findall(item)[0]))].replace('\xdf\xdf', '\xdf')
                if new['title1'][-1:] == ':':
                    new['title1'] = new['title1'][0:-1]
                new['title2'] = new['title'][len(new['title1']) + 2:]
@ -67,21 +67,21 @@ def getNews(year, month, day):
            '''
    return news
-def splitTitle(title):
+def split_title(title):
    title1 = re.compile('(.*?): ').findall(title)[0]
    title2 = re.compile(': (.*?)$').findall(title)[0]
    return [title1, title2]
-def formatString(string):
+def format_string(string):
    string = string.replace('<span class="spOptiBreak"> </span>', '')
    string = string.replace('\n', ' ').replace('  ', ' ').strip()
    string = string.replace('&amp;', '&').replace('&apos;', '\'').replace('&quot;', '"')
    return string
-def formatSection(string):
+def format_section(string):
    return string[:1].upper() + string[1:].replace('spiegel', 'SPIEGEL')
-def formatSubsection(string):
+def format_subsection(string):
    # SPIEGEL, SPIEGEL special
    subsection = {
        'abi': 'Abi - und dann?',
@ -98,7 +98,7 @@ def formatSubsection(string):
        return subsection[string].replace(u'\xc3', 'ae')
    return string[:1].upper() + string[1:]
-def getIssue(year, week):
+def get_issue(year, week):
    coverUrl = 'http://www.spiegel.de/static/epaper/SP/%d/%d/ROSPANZ%d%03d0001-312.jpg' % (year, week, year, week)
    if not ox.net.exists(coverUrl):
        return None
@ -122,7 +122,7 @@ def getIssue(year, week):
    return {'pages': pages, 'contents': contents, 'coverUrl': coverUrl, 'pageUrl': pageUrl}
-def archiveIssues():
+def archive_issues():
    '''
    this is just an example of an archiving application
    '''
@ -140,8 +140,8 @@ def archiveIssues():
        else:
            wMax = 53
        for w in range(wMax, 0, -1):
-            print 'getIssue(%d, %d)' % (y, w)
+            print 'get_issue(%d, %d)' % (y, w)
-            issue = getIssue(y, w)
+            issue = get_issue(y, w)
            if issue:
                dirname = '%s/%d/%02d' % (archivePath, y, w)
                if not os.path.exists(dirname):
@ -188,7 +188,7 @@ def archiveIssues():
                print p['min'], p['sum'] / p['num'], p['max']
-def archiveNews():
+def archive_news():
    '''
    this is just an example of an archiving application
    '''
@ -235,7 +235,7 @@ def archiveNews():
                        f.close()
                    filename = filename[:-5] + '.txt'
                    if not os.path.exists(filename) or True:
-                        data = splitTitle(new['title'])
+                        data = split_title(new['title'])
                        data.append(new['description'])
                        data = '\n'.join(data)
                        f = open(filename, 'w')
@ -256,19 +256,14 @@ def archiveNews():
                        count[string] = {'count': 1, 'string': '%s %s http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (new['date'], new['date'], new['section'].lower(), y, int(datetime(y, m, d).strftime('%j')))}
                    else:
                        count[string] = {'count': count[string]['count'] + 1, 'string': '%s %s' % (new['date'], count[string]['string'][17:])}
-                    strings = splitTitle(new['title'])
+                    strings = split_title(new['title'])
                    if strings[0] != new['title1'] or strings[1] != new['title2']:
                        colon.append('%s %s %s: %s' % (new['date'], new['title'], new['title1'], new['title2']))
-            for key in sortDictByKey(count):
+            for key in sorted(count):
                print '%6d %-24s %s' % (count[key]['count'], key, count[key]['string'])
            for value in colon:
                print value
 def sortDictByKey(d):
    keys = d.keys()
    keys.sort()
    return keys
 if __name__ == '__main__':
    # spiegel = Spiegel(2008, 8)
    # print spiegel.getContents()
@ -281,12 +276,12 @@ if __name__ == '__main__':
        news = getNews(2008, 2, d)
        for new in news:
            strings = new['url'].split('/')
-            string = formatSection(strings[3])
+            string = format_section(strings[3])
            if len(strings) == 6:
-                string += '/' + formatSubsection(strings[4])
+                string += '/' + format_subsection(strings[4])
            if not string in x:
                x.append(string)
        print x
    '''
-    # archiveIssues()
+    # archive_issues()
-    archiveNews()
+    archive_news()
--- a/ox/web/thepiratebay.py
+++ b/ox/web/thepiratebay.py
@ -22,7 +22,9 @@ def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_
    headers['Cookie'] = 'language=en_EN'
    return cache.read_url(url, data, headers, timeout, unicode=unicode)
-def findMovies(query, max_results=10):
+def find_movies(query=None, imdb=None, max_results=10):
    if imdb:
        query = "tt" + normalize_imdbid(imdb)
    results = []
    next = ["http://thepiratebay.org/search/%s/0/3/200" % quote(query), ]
    page_count = 1
@ -47,10 +49,7 @@ def findMovies(query, max_results=10):
        next = re.compile('<a.*?href="(.*?)".*?>.*?next.gif.*?</a>').findall(data)
    return results
-def findMovieByImdb(imdb):
+def get_id(piratebayId):
    return findMovies("tt" + normalize_imdbid(imdb))
 def getId(piratebayId):
    if piratebayId.startswith('http://torrents.thepiratebay.org/'):
        piratebayId = piratebayId.split('org/')[1]
    d = find_re(piratebayId, "tor/(\d+)")
@ -62,10 +61,10 @@ def getId(piratebayId):
    return piratebayId
 def exists(piratebayId):
-    piratebayId = getId(piratebayId)
+    piratebayId = get_id(piratebayId)
    return ox.net.exists("http://thepiratebay.org/torrent/%s" % piratebayId)
-def getData(piratebayId):
+def get_data(piratebayId):
    _key_map = {
      'spoken language(s)': u'language',
      'texted language(s)': u'subtitle language',
@ -73,7 +72,7 @@ def getData(piratebayId):
      'leechers': 'leecher',
      'seeders': 'seeder',
    }
-    piratebayId = getId(piratebayId)
+    piratebayId = get_id(piratebayId)
    torrent = dict()
    torrent[u'id'] = piratebayId
    torrent[u'domain'] = 'thepiratebay.org'
@ -108,7 +107,7 @@ class Thepiratebay(Torrent):
    '4e84415d36ed7b54066160c05a0b0f061898d12b'
    '''
    def __init__(self, piratebayId):
-        self.data = getData(piratebayId)
+        self.data = get_data(piratebayId)
        if not self.data:
            return
        Torrent.__init__(self)
--- a/ox/web/tv.py
+++ b/ox/web/tv.py
@ -7,12 +7,12 @@ from ox import strip_tags, find_re
 from ox.cache import read_url
-def getEpisodeData(url):
+def get_episode_data(url):
    '''
      prases informatin on tvcom episode pages
      returns dict with title, show, description, score
      example:
-        getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
+        get_episode_data('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
    '''
    data = read_url(url, unicode=True)
    r = {}
--- a/ox/web/vimeo.py
+++ b/ox/web/vimeo.py
@ -8,7 +8,7 @@ from ox.cache import read_url
 from ox import find_string, find_re
-def getData(id):
+def get_data(id):
    url = 'http://www.vimeo.com/moogaloop/load/clip:%s' %id
    xml = read_url(url)
    tree = ET.parse(StringIO(xml))
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -8,52 +8,45 @@ from ox.cache import read_url
 from ox import find_re, decode_html
-def getId(url):
+def get_id(url):
    return url.split("/")[-1]
-def getUrl(id):
+def get_url(id=None, imdb=None, allmovie=None):
    if imdb:
        query = '"%s"'% imdbId
        result = find(query)
        if result:
            url = result[0][1]
            data = get_movie_data(url)
            if 'imdb_id' in data:
                return url
        return ""
    if allmovie:
        query = '"amg_id = 1:%s"'% allmovie
        result = find(query)
        if result:
            url = result[0][1]
            return url
        return ''
    return "http://en.wikipedia.org/wiki/%s" % id
-
+def get_movie_id(title, director='', year=''):
 def getMovieId(title, director='', year=''):
    query = '"%s" film %s %s' % (title, director, year)
    result = find(query, 1)
    if result:
        return result[0][1]
    return ''
-def getUrlByImdbId(imdbId):
+def get_wiki_data(wikipedia_url):
-    query = '"%s"'% imdbId
+    url = wikipedia_url.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
    result = find(query)
    if result:
        url = result[0][1]
        data = getMovieData(url)
        if 'imdb_id' in data:
            return url
    return ""
 def getUrlByImdb(imdbId):
    # deprecated, use getUrlByImdbId()
    return getUrlByImdbId(imdbId)
 def getUrlByAllmovieId(allmovieId):
    query = '"amg_id = 1:%s"'% allmovieId
    result = find(query)
    if result:
        url = result[0][1]
        return url
    return ''
 def getWikiData(wikipediaUrl):
    url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
    url = "%s&action=raw" % url
    data = read_url(url).decode('utf-8')
    return data
-def getMovieData(wikipediaUrl):
+def get_movie_data(wikipedia_url):
-    if not wikipediaUrl.startswith('http'):
+    if not wikipedia_url.startswith('http'):
-        wikipediaUrl = getUrl(wikipediaUrl)
+        wikipedia_url = get_url(wikipedia_url)
-    data = getWikiData(wikipediaUrl)
+    data = get_wiki_data(wikipedia_url)
    filmbox_data = find_re(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
    filmbox = {}
    _box = filmbox_data.strip().split('|')
@ -104,7 +97,7 @@ def getMovieData(wikipediaUrl):
        filmbox['title_sort'] = find_re(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
    return filmbox
-def getImageUrl(name):
+def get_image_url(name):
    url = 'http://en.wikipedia.org/wiki/Image:' + name.replace(' ', '%20')
    data = read_url(url, unicode=True)
    url = find_re(data, 'href="(http://upload.wikimedia.org/.*?)"')
@ -114,19 +107,19 @@ def getImageUrl(name):
            url = 'http:' + url
    return url
-def getPosterUrl(wikipediaUrl):
+def get_poster_url(wikipedia_url):
-    if not wikipediaUrl.startswith('http'): wikipediaUrl = getUrl(wikipediaUrl)
+    if not wikipedia_url.startswith('http'): wikipedia_url = get_url(wikipedia_url)
-    data = getMovieData(wikipediaUrl)
+    data = get_movie_data(wikipedia_url)
    if 'image' in data:
-        return getImageUrl(data['image'])
+        return get_image_url(data['image'])
    return ''
-def getMoviePoster(wikipediaUrl):
+def get_movie_poster(wikipedia_url):
-    # deprecated, use getPosterUrl()
+    # deprecated, use get_poster_url()
-    return getPosterUrl(wikipediaUrl)
+    return get_poster_url(wikipedia_url)
-def getAllmovieId(wikipediaUrl):
+def get_allmovie_id(wikipedia_url):
-    data = getMovieData(wikipediaUrl)
+    data = get_movie_data(wikipedia_url)
    return data.get('amg_id', '')
 def find(query, max_results=10):
--- a/ox/web/youtube.py
+++ b/ox/web/youtube.py
@ -8,7 +8,7 @@ import feedparser
 from ox.cache import read_url, cache_timeout
-def getVideoUrl(youtubeId, format='mp4', timeout=cache_timeout):
+def video_url(youtubeId, format='mp4', timeout=cache_timeout):
    """
        youtubeId - if of video
        format - video format, options: webm, 1080p, 720p, mp4, high