replace all CammelCase with under_score in ox

2012-08-14 16:12:43 +02:00 · 2012-08-14 16:12:43 +02:00 · bb35daa95c
commit bb35daa95c
parent 2de989e188
31 changed files with 242 additions and 244 deletions
--- a/ox/web/allmovie.py
+++ b/ox/web/allmovie.py
@ -3,7 +3,7 @@
 import re
 import time

-from ox import strip_tags, findRe
+from ox import strip_tags, find_re
 from ox.cache import read_url


@ -28,22 +28,22 @@ def getData(id):
    }
    html = read_url(data["url"], unicode=True)
    data['aka'] = parseList(html, 'AKA')
-    data['category'] = findRe(html, '<dt>category</dt>.*?<dd>(.*?)</dd>')
+    data['category'] = find_re(html, '<dt>category</dt>.*?<dd>(.*?)</dd>')
    data['countries'] = parseList(html, 'countries')
    data['director'] = parseEntry(html, 'directed by')
    data['genres'] = parseList(html, 'genres')
    data['keywords'] = parseList(html, 'keywords')
-    data['posters'] = [findRe(html, '<img src="(http://cps-.*?)"')]
+    data['posters'] = [find_re(html, '<img src="(http://cps-.*?)"')]
    data['produced'] = parseList(html, 'produced by')
-    data['rating'] = findRe(html, 'Stars" title="(.*?) Stars"')
+    data['rating'] = find_re(html, 'Stars" title="(.*?) Stars"')
    data['released'] = parseEntry(html, 'released by')
    data['releasedate'] = parseList(html, 'release date')
    data['runtime'] = parseEntry(html, 'run time').replace('min.', '').strip()
    data['set'] = parseEntry(html, 'set in')
-    data['synopsis'] = strip_tags(findRe(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
+    data['synopsis'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
    data['themes'] = parseList(html, 'themes')
    data['types'] = parseList(html, 'types')
-    data['year'] = findRe(html, '<span class="year">.*?(\d+)')
+    data['year'] = find_re(html, '<span class="year">.*?(\d+)')
    #data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)]
    data['stills'] = re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)
    #html = read_url("http://allmovie.com/work/%s/cast" % id, unicode=True)
@ -51,18 +51,18 @@ def getData(id):
    #html = read_url("http://allmovie.com/work/%s/credits" % id, unicode=True)
    #data['credits'] = parseTable(html)
    html = read_url("http://allmovie.com/work/%s/review" % id, unicode=True)
-    data['review'] = strip_tags(findRe(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
+    data['review'] = strip_tags(find_re(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
    return data

 def getUrl(id):
    return "http://allmovie.com/work/%s" % id

 def parseEntry(html, title):
-    html = findRe(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title)
+    html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title)
    return strip_tags(html).strip()

 def parseList(html, title):
-    html = findRe(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
+    html = find_re(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
    r = map(lambda x: strip_tags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
    if not r and html:
        r = [strip_tags(html)]
@ -74,11 +74,11 @@ def parseTable(html):
            lambda x: strip_tags(x).strip().replace('&nbsp;', ''),
            x.split('<td width="305">-')
        ),
-        findRe(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
+        find_re(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
    )

 def parseText(html, title):
-    return strip_tags(findRe(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
+    return strip_tags(find_re(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()

 if __name__ == '__main__':
    print getData('129689')
--- a/ox/web/amazon.py
+++ b/ox/web/amazon.py
@ -3,7 +3,7 @@
 import re
 from urllib import quote

-from ox import findRe, strip_tags, decodeHtml
+from ox import find_re, strip_tags, decode_html
 from ox.cache import read_url


@ -12,7 +12,7 @@ def findISBN(title, author):
    url = "http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Dstripbooks&field-keywords=" + "%s&x=0&y=0" % quote(q)
    data = read_url(url, unicode=True)
    links = re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)
-    id = findRe(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/')
+    id = find_re(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/')
    data = getData(id)
    if author in data['authors']:
        return data
@ -24,13 +24,13 @@ def getData(id):


    def findData(key):
-        return findRe(data, '<li><b>%s:</b>(.*?)</li>'% key).strip()
+        return find_re(data, '<li><b>%s:</b>(.*?)</li>'% key).strip()

    r = {}
    r['amazon'] = url
-    r['title'] = findRe(data, '<span id="btAsinTitle" style="">(.*?)<span')
+    r['title'] = find_re(data, '<span id="btAsinTitle" style="">(.*?)<span')
    r['authors'] = re.compile('<b class="h3color">(.*?)</b>.*?\(Author\)', re.DOTALL).findall(data)
-    r['authors'] = filter(lambda x: len(x)>1, [decodeHtml(a) for a in r['authors']])
+    r['authors'] = filter(lambda x: len(x)>1, [decode_html(a) for a in r['authors']])
    t = re.compile('>(.*?)</a> \(Translator\)').findall(data)
    if t:
        r['translator'] = t
@ -38,15 +38,15 @@ def getData(id):
    r['language'] = findData('Language')
    r['isbn-10'] = findData('ISBN-10')
    r['isbn-13'] = findData('ISBN-13').replace('-', '')
-    r['dimensions'] = findRe(data, '<li><b>.*?Product Dimensions:.*?</b>(.*?)</li>')
+    r['dimensions'] = find_re(data, '<li><b>.*?Product Dimensions:.*?</b>(.*?)</li>')

    r['pages'] = findData('Paperback')
    if not r['pages']:
        r['pages'] = findData('Hardcover')

-    r['review'] = strip_tags(findRe(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
+    r['review'] = strip_tags(find_re(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()

-    r['description'] = strip_tags(findRe(data, '<h3 class="productDescriptionSource">Product Description</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
+    r['description'] = strip_tags(find_re(data, '<h3 class="productDescriptionSource">Product Description</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()

    r['cover'] = re.findall('src="(.*?)" id="prodImage"', data)
    if r['cover']:
--- a/ox/web/criterion.py
+++ b/ox/web/criterion.py
@ -5,7 +5,7 @@ import re
 import ox.cache
 from ox.cache import read_url
 from ox.html import strip_tags
-from ox.text import findRe, removeSpecialCharacters
+from ox.text import find_re, remove_special_characters

 import imdb

@ -33,40 +33,40 @@ def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
        html = read_url(data["url"], timeout=timeout, unicode=True)
    except:
        html = ox.cache.read_url(data["url"], timeout=timeout)
-    data["number"] = findRe(html, "<li>Spine #(\d+)")
+    data["number"] = find_re(html, "<li>Spine #(\d+)")

-    data["title"] = findRe(html, "<meta property=['\"]og:title['\"] content=['\"](.*?)['\"]")
+    data["title"] = find_re(html, "<meta property=['\"]og:title['\"] content=['\"](.*?)['\"]")
    data["title"] = data["title"].split(u' \u2014 The Television Version')[0]
-    data["director"] = strip_tags(findRe(html, "<h2 class=\"director\">(.*?)</h2>"))
-    results = findRe(html, '<div class="left_column">(.*?)</div>')
+    data["director"] = strip_tags(find_re(html, "<h2 class=\"director\">(.*?)</h2>"))
+    results = find_re(html, '<div class="left_column">(.*?)</div>')
    results = re.compile("<li>(.*?)</li>").findall(results)
    data["country"] = results[0]
    data["year"] = results[1]
-    data["synopsis"] = strip_tags(findRe(html, "<p><strong>SYNOPSIS:</strong> (.*?)</p>"))
+    data["synopsis"] = strip_tags(find_re(html, "<p><strong>SYNOPSIS:</strong> (.*?)</p>"))

-    result = findRe(html, "<div class=\"purchase\">(.*?)</div>")
+    result = find_re(html, "<div class=\"purchase\">(.*?)</div>")
    if 'Blu-Ray' in result or 'Essential Art House DVD' in result:
        r = re.compile('<h3 class="section_title first">Other Editions</h3>(.*?)</div>', re.DOTALL).findall(html)
        if r:
            result = r[0]
-    result = findRe(result, "<a href=\"(.*?)\"")
+    result = find_re(result, "<a href=\"(.*?)\"")
    if not "/boxsets/" in result:
        data["posters"] = [result]
    else:
        html_ = read_url(result, unicode=True)
-        result = findRe(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id)
-        result = findRe(result, "src=\"(.*?)\"")
+        result = find_re(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id)
+        result = find_re(result, "src=\"(.*?)\"")
        if result:
            data["posters"] = [result.replace("_w100", "")]
        else:
            data["posters"] = []
-    result = findRe(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"")
+    result = find_re(html, "<img alt=\"Film Still\" height=\"252\" src=\"(.*?)\"")
    if result:
        data["stills"] = [result]
        data["trailers"] = []
    else:
-        data["stills"] = filter(lambda x: x, [findRe(html, "\"thumbnailURL\", \"(.*?)\"")])
-        data["trailers"] = filter(lambda x: x, [findRe(html, "\"videoURL\", \"(.*?)\"")])
+        data["stills"] = filter(lambda x: x, [find_re(html, "\"thumbnailURL\", \"(.*?)\"")])
+        data["trailers"] = filter(lambda x: x, [find_re(html, "\"videoURL\", \"(.*?)\"")])

    if timeout == ox.cache.cache_timeout:
        timeout = -1
--- a/ox/web/duckduckgo.py
+++ b/ox/web/duckduckgo.py
@ -3,7 +3,7 @@
 import re
 import urllib
 import ox
-from ox import strip_tags, decodeHtml
+from ox import strip_tags, decode_html
 from ox.utils import json
 from ox.cache import read_url

@ -17,6 +17,6 @@ def find(query, timeout=ox.cache.cache_timeout):
    results = []
    regex = '<a .*?class="l le" href="(.+?)">(.*?)</a>.*?<div class="cra">(.*?)</div>'
    for r in re.compile(regex, re.DOTALL).findall(data):
-        results.append((strip_tags(decodeHtml(r[1])), r[0], strip_tags(decodeHtml(r[2]))))
+        results.append((strip_tags(decode_html(r[1])), r[0], strip_tags(decode_html(r[2]))))
    return results
    
--- a/ox/web/epguides.py
+++ b/ox/web/epguides.py
@ -3,7 +3,7 @@
 import re
 import time

-from ox import strip_tags, findRe
+from ox import strip_tags, find_re
 from ox.cache import read_url

 import google
@ -23,8 +23,8 @@ def getShowUrl(title):
 def getShowData(url):
    data = read_url(url, unicode=True)
    r = {}
-    r['title'] = strip_tags(findRe(data, '<h1>(.*?)</h1>'))
-    r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
+    r['title'] = strip_tags(find_re(data, '<h1>(.*?)</h1>'))
+    r['imdb'] = find_re(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
    r['episodes'] = {}
    #1.   1- 1       1001      7 Aug 05   You Can't Miss the Bear
    for episode in re.compile('(\d+?)\..*?(\d+?-.\d.*?) .*?(\d+?) .*?(.*?) <a target="_blank" href="(.*?)">(.*?)</a>').findall(data):
--- a/ox/web/flixter.py
+++ b/ox/web/flixter.py
@ -5,7 +5,7 @@ import re
 from lxml.html import document_fromstring

 from ox.cache import read_url
-from ox import findRe, strip_tags
+from ox import find_re, strip_tags
 from ox.web.imdb import ImdbCombined


--- a/ox/web/freebase.py
+++ b/ox/web/freebase.py
@ -3,7 +3,7 @@
 import json

 from ox.cache import read_url
-from ox import findRe
+from ox import find_re

 class Imdb(dict):
    def __init__(self, id, timeout=-1):
@ -36,7 +36,7 @@ class Imdb(dict):

        if 'nytimes' in self:
            self['nytimes'] = self['nytimes'].replace('_/overview', '%s/overview' % self['name'].replace(' ', '-'))
-            self['amgId'] = findRe(self['nytimes'], 'movie/(\d+)/')
+            self['amgId'] = find_re(self['nytimes'], 'movie/(\d+)/')



--- a/ox/web/google.py
+++ b/ox/web/google.py
@ -4,7 +4,7 @@ import re
 import urllib

 import ox
-from ox import strip_tags, decodeHtml
+from ox import strip_tags, decode_html

 DEFAULT_MAX_RESULTS = 10
 DEFAULT_TIMEOUT = 24*60*60
@ -34,7 +34,7 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
    for a in re.compile(
        '<a href="(\S+?)" class=l .*?>(.*?)</a>.*?<span class="st">(.*?)<\/span>'
    ).findall(data):
-        results.append((strip_tags(decodeHtml(a[1])), a[0], strip_tags(decodeHtml(a[2]))))
+        results.append((strip_tags(decode_html(a[1])), a[0], strip_tags(decode_html(a[2]))))
        if len(results) >= max_results:
            break
    return results
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -8,8 +8,8 @@ import time
 import unicodedata

 import ox
-from ox import findRe, strip_tags
-from ox.normalize import normalizeTitle, normalizeImdbId
+from ox import find_re, strip_tags
+from ox.normalize import normalize_title, normalize_imdbid
 import ox.cache

 from siteparser import SiteParser
@ -50,7 +50,7 @@ class Imdb(SiteParser):
            'page': 'business',
            're': [
                '<h5>Budget</h5>\s*?\$(.*?)<br',
-                lambda data: findRe(ox.decodeHtml(data).replace(',', ''), '\d+')
+                lambda data: find_re(ox.decode_html(data).replace(',', ''), '\d+')
            ],
            'type': 'int'
        },
@ -141,7 +141,7 @@ class Imdb(SiteParser):
            'page': 'business',
            're': [
                '<h5>Gross</h5>\s*?\$(.*?)<br',
-                lambda data: findRe(data.replace(',', ''), '\d+')
+                lambda data: find_re(data.replace(',', ''), '\d+')
            ],
            'type': 'int'
        },
@ -314,7 +314,7 @@ class Imdb(SiteParser):
        if 'runtime' in self and self['runtime']:
            if 'min' in self['runtime']: base=60
            else: base=1
-            self['runtime'] = int(findRe(self['runtime'], '([0-9]+)')) * base
+            self['runtime'] = int(find_re(self['runtime'], '([0-9]+)')) * base
        if 'runtime' in self and not self['runtime']:
            del self['runtime']
        if 'votes' in self: self['votes'] = self['votes'].replace(',', '')
@ -551,7 +551,7 @@ def getMovieId(title, director='', year='', timeout=-1):
    #print google_query
    results = google.find(google_query, timeout=timeout)
    if results:
-        return findRe(results[0][1], 'title/tt(\d{7})')
+        return find_re(results[0][1], 'title/tt(\d{7})')
    #or nothing
    return ''

@ -567,7 +567,7 @@ def getMoviePoster(imdbId):
    if 'posterId' in info:
        url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['posterId'], imdbId)
        data = read_url(url)
-        poster = findRe(data, 'img id="primary-img".*?src="(.*?)"')
+        poster = find_re(data, 'img id="primary-img".*?src="(.*?)"')
        return poster
    elif 'series' in info:
        return getMoviePoster(info['series'])
--- a/ox/web/impawards.py
+++ b/ox/web/impawards.py
@ -4,7 +4,7 @@ import re

 from ox.cache import read_url
 from ox.html import strip_tags
-from ox.text import findRe
+from ox.text import find_re


 def getData(id):
@ -22,13 +22,13 @@ def getData(id):
        'url': getUrl(id)
    }
    html = read_url(data['url'], unicode=True)
-    data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
+    data['imdbId'] = find_re(html, 'imdb.com/title/tt(\d{7})')
    if not data['imdbId']:
        data['imdbId'] = _id_map.get(id, '')
-    data['title'] = strip_tags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
-    data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
+    data['title'] = strip_tags(find_re(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
+    data['year'] = find_re(html, '\(<a href="alpha1.html">(.*?)</a>\)')
    data['posters'] = []
-    poster = findRe(html, '<img src="(posters.*?)"')
+    poster = find_re(html, '<img src="(posters.*?)"')
    if poster:
        poster = 'http://www.impawards.com/%s/%s' % (data['year'], poster)
        data['posters'].append(poster)
@ -37,13 +37,13 @@ def getData(id):
        result = result.replace('_xlg.html', '.html')
        url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
        html = read_url(url, unicode=True)
-        result = findRe(html, '<a href = (\w*?_xlg.html)')
+        result = find_re(html, '<a href = (\w*?_xlg.html)')
        if result:
            url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
            html = read_url(url, unicode=True)
-            poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img SRC="(.*?)"'))
+            poster = 'http://www.impawards.com/%s/%s' % (data['year'], find_re(html, '<img SRC="(.*?)"'))
        else:
-            poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)"'))
+            poster = 'http://www.impawards.com/%s/%s' % (data['year'], find_re(html, '<img src="(posters.*?)"'))
        data['posters'].append(poster)

    return data
@ -54,7 +54,7 @@ def getId(url):
    split = split[4][:-5].split('_')
    if split[-1] == 'xlg':
        split.pop()
-    if findRe(split[-1], 'ver\d+$'):
+    if find_re(split[-1], 'ver\d+$'):
        split.pop()
    id = '%s/%s' % (year, '_'.join(split))
    return id
@ -62,7 +62,7 @@ def getId(url):
 def getIds():
    ids = []
    html = read_url('http://www.impawards.com/archives/latest.html', timeout = 60*60, unicode=True)
-    pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
+    pages = int(find_re(html, '<a href= page(.*?).html>')) + 1
    for page in range(pages, 0, -1):
        for id in getIdsByPage(page):
            if not id in ids:
@ -81,7 +81,7 @@ def getIdsByPage(page):
 def getUrl(id):
    url = u"http://www.impawards.com/%s.html" % id
    html = read_url(url, unicode=True)
-    if findRe(html, "No Movie Posters on This Page"):
+    if find_re(html, "No Movie Posters on This Page"):
        url = u"http://www.impawards.com/%s_ver1.html" % id
    return url

--- a/ox/web/itunes.py
+++ b/ox/web/itunes.py
@ -4,9 +4,9 @@ import re
 import urllib

 from ox.cache import read_url
-from ox.html import decodeHtml, strip_tags
-from ox.text import findRe
-from ox.text import findString
+from ox.html import decode_html, strip_tags
+from ox.text import find_re
+from ox.text import find_string


 # to sniff itunes traffic, use something like
@ -65,26 +65,26 @@ def parseXmlDict(xml):
    strings = xml.split('<key>')
    for string in strings:
        if string.find('</key>') != -1:
-            key = findRe(string, '(.*?)</key>')
-            type = findRe(string, '</key><(.*?)>')
+            key = find_re(string, '(.*?)</key>')
+            type = find_re(string, '</key><(.*?)>')
            if type == 'true/':
                value = True
            else:
-                value = findRe(string, '<%s>(.*?)</%s>' % (type, type))
+                value = find_re(string, '<%s>(.*?)</%s>' % (type, type))
                if type == 'integer':
                  value = int(value)
                elif type == 'string':
-                  value = decodeHtml(value)
+                  value = decode_html(value)
            values[key] = value
    return values

 def parseCast(xml, title):
    list = []
    try:
-        strings = findRe(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
+        strings = find_re(xml, '<SetFontStyle normalStyle="textColor">%s(.*?)</VBoxView>' % title[:-1].upper()).split('</GotoURL>')
        strings.pop()
        for string in strings:
-            list.append(findRe(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+            list.append(find_re(string, '<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        return list
    except:
        return list
@ -92,12 +92,12 @@ def parseCast(xml, title):
 def parseMovies(xml, title):
    list = []
    try:
-        strings = findRe(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
+        strings = find_re(xml, '<SetFontStyle normalStyle="outlineTitleFontStyle"><b>%s(.*?)</Test>' % title[:-1].upper()).split('</GotoURL>')
        strings.pop()
        for string in strings:
            list.append({
-              'id': findRe(string, 'viewMovie\?id=(.*?)&'),
-              'title': findRe(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
+              'id': find_re(string, 'viewMovie\?id=(.*?)&'),
+              'title': find_re(string, '<SetFontStyle normalStyle="outlineTextFontStyle"><b>(.*?)</b></SetFontStyle>')
            })
        return list
    except:
@ -114,24 +114,24 @@ class ItunesAlbum:
    def getId(self):
        url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
        xml = read_url(url, headers = ITUNES_HEADERS)
-        id = findRe(xml, 'viewAlbum\?id=(.*?)&')
+        id = find_re(xml, 'viewAlbum\?id=(.*?)&')
        return id

    def getData(self):
        data = {'id': self.id}
        url = composeUrl('viewAlbum', {'id': self.id})
        xml = read_url(url, None, ITUNES_HEADERS)
-        data['albumName'] = findRe(xml, '<B>(.*?)</B>')
-        data['artistName'] = findRe(xml, '<b>(.*?)</b>')
-        data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
-        data['genre'] = findRe(xml, 'Genre:(.*?)<')
-        data['releaseDate'] = findRe(xml, 'Released(.*?)<')
-        data['review'] = strip_tags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+        data['albumName'] = find_re(xml, '<B>(.*?)</B>')
+        data['artistName'] = find_re(xml, '<b>(.*?)</b>')
+        data['coverUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
+        data['genre'] = find_re(xml, 'Genre:(.*?)<')
+        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
+        data['review'] = strip_tags(find_re(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        data['tracks'] = []
-        strings = findRe(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
+        strings = find_re(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
        for string in strings:
          data['tracks'].append(parseXmlDict(string))
-        data['type'] = findRe(xml, '<key>listType</key><string>(.*?)<')
+        data['type'] = find_re(xml, '<key>listType</key><string>(.*?)<')
        return data

 class ItunesMovie:
@ -145,7 +145,7 @@ class ItunesMovie:
    def getId(self):
        url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
        xml = read_url(url, headers = ITUNES_HEADERS)
-        id = findRe(xml, 'viewMovie\?id=(.*?)&')
+        id = find_re(xml, 'viewMovie\?id=(.*?)&')
        return id

    def getData(self):
@ -156,21 +156,21 @@ class ItunesMovie:
        f.write(xml)
        f.close()
        data['actors'] = parseCast(xml, 'actors')
-        string = findRe(xml, 'Average Rating:(.*?)</HBoxView>')
+        string = find_re(xml, 'Average Rating:(.*?)</HBoxView>')
        data['averageRating'] = string.count('rating_star_000033.png') + string.count('&#189;') * 0.5
        data['directors'] = parseCast(xml, 'directors')
-        data['format'] = findRe(xml, 'Format:(.*?)<')
-        data['genre'] = decodeHtml(findRe(xml, 'Genre:(.*?)<'))
-        data['plotSummary'] = decodeHtml(findRe(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
-        data['posterUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
+        data['format'] = find_re(xml, 'Format:(.*?)<')
+        data['genre'] = decode_html(find_re(xml, 'Genre:(.*?)<'))
+        data['plotSummary'] = decode_html(find_re(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+        data['posterUrl'] = find_re(xml, 'reflection="." url="(.*?)"')
        data['producers'] = parseCast(xml, 'producers')
-        data['rated'] = findRe(xml, 'Rated(.*?)<')
+        data['rated'] = find_re(xml, 'Rated(.*?)<')
        data['relatedMovies'] = parseMovies(xml, 'related movies')
-        data['releaseDate'] = findRe(xml, 'Released(.*?)<')
-        data['runTime'] = findRe(xml, 'Run Time:(.*?)<')
+        data['releaseDate'] = find_re(xml, 'Released(.*?)<')
+        data['runTime'] = find_re(xml, 'Run Time:(.*?)<')
        data['screenwriters'] = parseCast(xml, 'screenwriters')
-        data['soundtrackId'] = findRe(xml, 'viewAlbum\?id=(.*?)&')
-        data['trailerUrl'] = findRe(xml, 'autoplay="." url="(.*?)"')
+        data['soundtrackId'] = find_re(xml, 'viewAlbum\?id=(.*?)&')
+        data['trailerUrl'] = find_re(xml, 'autoplay="." url="(.*?)"')
        return data

 if __name__ == '__main__':
--- a/ox/web/lyricsfly.py
+++ b/ox/web/lyricsfly.py
@ -1,20 +1,20 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 from ox.cache import read_url
-from ox.html import decodeHtml
-from ox.text import findRe
+from ox.html import decode_html
+from ox.text import find_re


 def getLyrics(title, artist):
    html = read_url('http://lyricsfly.com/api/')
-    key = findRe(html, '<font color=green><b>(.*?)</b></font>')
+    key = find_re(html, '<font color=green><b>(.*?)</b></font>')
    url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
    xml = read_url(url)
-    lyrics = findRe(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
+    lyrics = find_re(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
    lyrics = lyrics.replace('\n', '').replace('\r', '')
    lyrics = lyrics.replace('[br]', '\n').strip()
    lyrics.replace('\n\n\n', '\n\n')
-    lyrics = decodeHtml(lyrics.replace('&amp;', '&'))
+    lyrics = decode_html(lyrics.replace('&amp;', '&'))
    return lyrics

 if __name__ == '__main__':
--- a/ox/web/metacritic.py
+++ b/ox/web/metacritic.py
@ -5,7 +5,7 @@ from urllib import quote
 from lxml.html import document_fromstring

 from ox.cache import read_url
-from ox import findRe, strip_tags
+from ox import find_re, strip_tags

 def getUrl(id):
    return 'http://www.metacritic.com/movie/%s' % id
@ -16,14 +16,14 @@ def getId(url):
 def getUrlByImdb(imdb):
    url = "http://www.imdb.com/title/tt%s/criticreviews" % imdb
    data = read_url(url)
-    metacritic_url = findRe(data, '"(http://www.metacritic.com/movie/.*?)"')
+    metacritic_url = find_re(data, '"(http://www.metacritic.com/movie/.*?)"')
    return metacritic_url or None

 def getMetacriticShowUrl(title):
    title = quote(title)
    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
    data = read_url(url)
-    return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
+    return find_re(data, '(http://www.metacritic.com/tv/shows/.*?)\?')

 def getData(url):
    data = read_url(url, unicode=True)
--- a/ox/web/mininova.py
+++ b/ox/web/mininova.py
@ -6,8 +6,8 @@ import socket
 from urllib import quote

 from ox.cache import read_url
-from ox import findRe, cache, strip_tags, decodeHtml, getTorrentInfo, int_value, normalizeNewlines
-from ox.normalize import normalizeImdbId
+from ox import find_re, cache, strip_tags, decode_html, getTorrentInfo, int_value, normalize_newlines
+from ox.normalize import normalize_imdbid
 import ox

 from torrent import Torrent
@ -20,7 +20,7 @@ def _parseResultsPage(data, max_results=10):
        torrentDate = row[0]
        torrentExtra = row[1]
        torrentId = row[2]
-        torrentTitle = decodeHtml(row[3]).strip()
+        torrentTitle = decode_html(row[3]).strip()
        torrentLink = "http://www.mininova.org/tor/" + torrentId
        privateTracker = 'priv.gif' in torrentExtra
        if not privateTracker:
@ -38,13 +38,13 @@ def findMovieByImdb(imdbId):
    '''find torrents on mininova for a given imdb id
    '''
    results = []
-    imdbId = normalizeImdbId(imdbId)
+    imdbId = normalize_imdbid(imdbId)
    data = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdbId, unicode=True)
    return _parseResultsPage(data)

 def getId(mininovaId):
    mininovaId = unicode(mininovaId)
-    d = findRe(mininovaId, "/(\d+)")
+    d = find_re(mininovaId, "/(\d+)")
    if d:
        return d
    mininovaId = mininovaId.split('/')
@ -81,14 +81,14 @@ def getData(mininovaId):
    for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
        key = d[0].lower().strip()
        key = _key_map.get(key, key)
-        value = decodeHtml(strip_tags(d[1].strip()))
+        value = decode_html(strip_tags(d[1].strip()))
        torrent[key] = value

-    torrent[u'title'] = findRe(data, '<title>(.*?):.*?</title>')
-    torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
-    torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
+    torrent[u'title'] = find_re(data, '<title>(.*?):.*?</title>')
+    torrent[u'imdbId'] = find_re(data, 'title/tt(\d{7})')
+    torrent[u'description'] = find_re(data, '<div id="description">(.*?)</div>')
    if torrent['description']:
-        torrent['description'] = normalizeNewlines(decodeHtml(strip_tags(torrent['description']))).strip()
+        torrent['description'] = normalize_newlines(decode_html(strip_tags(torrent['description']))).strip()
    t = read_url(torrent[u'torrent_link'])
    torrent[u'torrent_info'] = getTorrentInfo(t)
    return torrent
--- a/ox/web/movieposterdb.py
+++ b/ox/web/movieposterdb.py
@ -4,7 +4,7 @@
 import re

 from ox.cache import read_url
-from ox import findRe
+from ox import find_re

 def getData(id):
    '''
@ -33,7 +33,7 @@ def getPostersByUrl(url, group=True, timeout=-1):
        results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
        for result in results:
            html = read_url(result, timeout=timeout, unicode=True)
-            posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
+            posters.append(find_re(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
    return posters

 def getUrl(id):
--- a/ox/web/opensubtitles.py
+++ b/ox/web/opensubtitles.py
@ -4,7 +4,7 @@ import re

 import feedparser
 from ox.cache import read_url
-from ox import findRe, strip_tags
+from ox import find_re, strip_tags
 from ox import langCode2To3, langTo3Code

 def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
@ -26,7 +26,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
            if opensubtitleId:
                opensubtitleId = opensubtitleId[0]
    else:
-        opensubtitleId = findRe(data, '/en/subtitles/(.*?)/')
+        opensubtitleId = find_re(data, '/en/subtitles/(.*?)/')
    return opensubtitleId

 def downloadSubtitleById(opensubtitle_id):
--- a/ox/web/rottentomatoes.py
+++ b/ox/web/rottentomatoes.py
@ -3,7 +3,7 @@
 import re

 from ox.cache import getHeaders, read_url
-from ox import findRe, strip_tags
+from ox import find_re, strip_tags


 def getUrlByImdb(imdb):
@ -22,16 +22,16 @@ def getUrlByImdb(imdb):
    return None

 def get_og(data, key):
-    return findRe(data, '<meta property="og:%s".*?content="(.*?)"' % key)
+    return find_re(data, '<meta property="og:%s".*?content="(.*?)"' % key)

 def getData(url):
    data = read_url(url)
    r = {}
-    r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
+    r['title'] = find_re(data, '<h1 class="movie_title">(.*?)</h1>')
    if '(' in r['title']:
-        r['year'] = findRe(r['title'], '\((\d*?)\)')
+        r['year'] = find_re(r['title'], '\((\d*?)\)')
        r['title'] = strip_tags(re.sub('\((\d*?)\)', '', r['title'])).strip()
-    r['summary'] = strip_tags(findRe(data, '<p id="movieSynopsis" class="movie_synopsis" itemprop="description">(.*?)</p>')).strip()
+    r['summary'] = strip_tags(find_re(data, '<p id="movieSynopsis" class="movie_synopsis" itemprop="description">(.*?)</p>')).strip()
    r['summary'] = r['summary'].replace('\t', ' ').replace('\n', ' ').replace('  ', ' ').replace('  ', ' ')
    if not r['summary']:
        r['summary'] = get_og(data, 'description')
@ -40,9 +40,9 @@ def getData(url):
    meter = filter(lambda m: m[1].isdigit(), meter)
    if meter:
        r['tomatometer'] = meter[0][1]
-    r['rating'] = findRe(data, 'Average Rating: <span>([\d.]+)/10</span>')
-    r['user_score'] = findRe(data, '<span class="meter popcorn numeric ">(\d+)</span>')
-    r['user_rating'] = findRe(data, 'Average Rating: ([\d.]+)/5')
+    r['rating'] = find_re(data, 'Average Rating: <span>([\d.]+)/10</span>')
+    r['user_score'] = find_re(data, '<span class="meter popcorn numeric ">(\d+)</span>')
+    r['user_rating'] = find_re(data, 'Average Rating: ([\d.]+)/5')
    poster = get_og(data, 'image')
    if poster and not 'poster_default.gif' in poster:
        r['posters'] = [poster]
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -3,7 +3,7 @@
 import re

 from ..cache import read_url
-from .. import strip_tags, decodeHtml
+from .. import strip_tags, decode_html
 from ..utils import datetime


@ -11,8 +11,8 @@ def cleanup(key, data, data_type):
    if data:
        if isinstance(data[0], basestring):
            #FIXME: some types need strip_tags
-            #data = [strip_tags(decodeHtml(p)).strip() for p in data]
-            data = [decodeHtml(p).strip() for p in data]
+            #data = [strip_tags(decode_html(p)).strip() for p in data]
+            data = [decode_html(p).strip() for p in data]
        elif isinstance(data[0], list) or isinstance(data[0], tuple):
            data = [cleanup(key, p, data_type) for p in data]
        while len(data) == 1 and not isinstance(data, basestring):
--- a/ox/web/spiegel.py
+++ b/ox/web/spiegel.py
@ -5,7 +5,7 @@ import re
 import time

 import ox.cache
-from ox.html import decodeHtml, strip_tags
+from ox.html import decode_html, strip_tags
 import ox.net


@ -44,8 +44,8 @@ def getNews(year, month, day):
                    new['date'] = '%s-%s-%s 00:00' % (dateString[6:10], dateString[3:5], dateString[:2])
                else:
                    new['date'] = '%s-%s-%s %s:%s' % (dateString[6:10], dateString[3:5], dateString[:2], dateString[12:14], dateString[15:17])
-                # fix decodeHtml
-                # new['description'] = formatString(decodeHtml(description))
+                # fix decode_html
+                # new['description'] = formatString(decode_html(description))
                new['description'] = formatString(description)
                new['imageUrl'] = imageUrl
                new['section'] = formatSection(section)
--- a/ox/web/thepiratebay.py
+++ b/ox/web/thepiratebay.py
@ -6,8 +6,8 @@ import socket
 from urllib import quote, urlencode
 from urllib2 import URLError

-from ox import findRe, cache, strip_tags, decodeHtml, getTorrentInfo, normalizeNewlines
-from ox.normalize import normalizeImdbId
+from ox import find_re, cache, strip_tags, decode_html, getTorrentInfo, normalize_newlines
+from ox.normalize import normalize_imdbid
 import ox

 from torrent import Torrent
@ -38,7 +38,7 @@ def findMovies(query, max_results=10):
        for row in  re.compile(regexp, re.DOTALL).findall(data):
            torrentType = row[0]
            torrentLink = "http://thepiratebay.org" + row[1]
-            torrentTitle = decodeHtml(row[2])
+            torrentTitle = decode_html(row[2])
            # 201 = Movies , 202 = Movie DVDR, 205 TV Shows
            if torrentType in ['201']:
                results.append((torrentTitle, torrentLink, ''))
@ -48,15 +48,15 @@ def findMovies(query, max_results=10):
    return results

 def findMovieByImdb(imdb):
-    return findMovies("tt" + normalizeImdbId(imdb))
+    return findMovies("tt" + normalize_imdbid(imdb))

 def getId(piratebayId):
    if piratebayId.startswith('http://torrents.thepiratebay.org/'):
        piratebayId = piratebayId.split('org/')[1]
-    d = findRe(piratebayId, "tor/(\d+)")
+    d = find_re(piratebayId, "tor/(\d+)")
    if d:
        piratebayId = d
-    d = findRe(piratebayId, "torrent/(\d+)")
+    d = find_re(piratebayId, "torrent/(\d+)")
    if d:
        piratebayId = d
    return piratebayId
@ -80,21 +80,21 @@ def getData(piratebayId):
    torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId

    data = read_url(torrent['comment_link'], unicode=True)
-    torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
+    torrent[u'title'] = find_re(data, '<title>(.*?) \(download torrent\) - TPB</title>')
    if not torrent[u'title']:
        return None
-    torrent[u'title'] = decodeHtml(torrent[u'title']).strip()
-    torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
+    torrent[u'title'] = decode_html(torrent[u'title']).strip()
+    torrent[u'imdbId'] = find_re(data, 'title/tt(\d{7})')
    title = quote(torrent['title'].encode('utf-8'))
    torrent[u'torrent_link']="http://torrents.thepiratebay.org/%s/%s.torrent" % (piratebayId, title)
    for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
        key = d[0].lower().strip()
        key = _key_map.get(key, key)
-        value = decodeHtml(strip_tags(d[1].strip()))
+        value = decode_html(strip_tags(d[1].strip()))
        torrent[key] = value
-    torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
+    torrent[u'description'] = find_re(data, '<div class="nfo">(.*?)</div>')
    if torrent[u'description']:
-        torrent['description'] = normalizeNewlines(decodeHtml(strip_tags(torrent['description']))).strip()
+        torrent['description'] = normalize_newlines(decode_html(strip_tags(torrent['description']))).strip()
    t = _read_url(torrent[u'torrent_link'])
    torrent[u'torrent_info'] = getTorrentInfo(t)
    return torrent
--- a/ox/web/tv.py
+++ b/ox/web/tv.py
@ -3,7 +3,7 @@
 import re
 import time

-from ox import strip_tags, findRe
+from ox import strip_tags, find_re
 from ox.cache import read_url


@ -16,11 +16,11 @@ def getEpisodeData(url):
    '''
    data = read_url(url, unicode=True)
    r = {}
-    r['description'] = strip_tags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
-    r['show'] = findRe(data, '<h1>(.*?)</h1>')
-    r['title'] =  findRe(data, '<title>.*?: (.*?) - TV.com  </title>')
+    r['description'] = strip_tags(find_re(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
+    r['show'] = find_re(data, '<h1>(.*?)</h1>')
+    r['title'] =  find_re(data, '<title>.*?: (.*?) - TV.com  </title>')
    #episode score
-    r['episode score'] = findRe(data, '<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">(.*?)</span>')
+    r['episode score'] = find_re(data, '<span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">(.*?)</span>')

    match = re.compile('Episode Number: (\d*?) &nbsp;&nbsp; Season Num: (\d*?) &nbsp;&nbsp; First Aired: (.*?) &nbsp').findall(data) 
    if match:
--- a/ox/web/vimeo.py
+++ b/ox/web/vimeo.py
@ -5,7 +5,7 @@ from StringIO import StringIO
 import xml.etree.ElementTree as ET

 from ox.cache import read_url
-from ox import findString, findRe
+from ox import find_string, find_re


 def getData(id):
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -5,7 +5,7 @@ from urllib import urlencode

 from ox.utils import json
 from ox.cache import read_url
-from ox import findRe, decodeHtml
+from ox import find_re, decode_html


 def getId(url):
@ -54,7 +54,7 @@ def getMovieData(wikipediaUrl):
    if not wikipediaUrl.startswith('http'):
        wikipediaUrl = getUrl(wikipediaUrl)
    data = getWikiData(wikipediaUrl)
-    filmbox_data = findRe(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
+    filmbox_data = find_re(data, '''\{\{[Ii]nfobox.[Ff]ilm(.*?)\n\}\}''')
    filmbox = {}
    _box = filmbox_data.strip().split('|')
    for row in _box:
@ -72,12 +72,12 @@ def getMovieData(wikipediaUrl):
    if 'amg_id' in filmbox and not filmbox['amg_id'].isdigit():
        del filmbox['amg_id']
    if 'Allmovie movie' in data:
-        filmbox['amg_id'] = findRe(data, 'Allmovie movie\|.*?(\d+)')
+        filmbox['amg_id'] = find_re(data, 'Allmovie movie\|.*?(\d+)')
    elif 'Allmovie title' in data:
-        filmbox['amg_id'] = findRe(data, 'Allmovie title\|.*?(\d+)')
+        filmbox['amg_id'] = find_re(data, 'Allmovie title\|.*?(\d+)')

    if 'Official website' in data:
-        filmbox['website'] = findRe(data, 'Official website\|(.*?)}').strip()
+        filmbox['website'] = find_re(data, 'Official website\|(.*?)}').strip()

    r = re.compile('{{IMDb title\|id=(\d{7})', re.IGNORECASE).findall(data)
    if r:
@ -99,17 +99,17 @@ def getMovieData(wikipediaUrl):
    if r:
        filmbox['rottentomatoes_id'] = r[0].replace('id=', '')
    if 'google video' in data:
-        filmbox['google_video_id'] = findRe(data, 'google video\|.*?(\d*?)[\|}]')
+        filmbox['google_video_id'] = find_re(data, 'google video\|.*?(\d*?)[\|}]')
    if 'DEFAULTSORT' in data:
-        filmbox['title_sort'] = findRe(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
+        filmbox['title_sort'] = find_re(data, '''\{\{DEFAULTSORT:(.*?)\}\}''')
    return filmbox

 def getImageUrl(name):
    url = 'http://en.wikipedia.org/wiki/Image:' + name.replace(' ', '%20')
    data = read_url(url, unicode=True)
-    url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"')
+    url = find_re(data, 'href="(http://upload.wikimedia.org/.*?)"')
    if not url:
-        url = findRe(data, 'href="(//upload.wikimedia.org/.*?)"')
+        url = find_re(data, 'href="(//upload.wikimedia.org/.*?)"')
        if url:
            url = 'http:' + url
    return url