net/cache readUrl->read_url / Unicode -> unicode=True

format replace all CammelCase with under_score
2012-08-14 15:58:05 +02:00 · 2012-08-14 15:58:05 +02:00 · 2de989e188
commit 2de989e188
parent c1d0fc6242
33 changed files with 243 additions and 254 deletions
--- a/ox/cache.py
+++ b/ox/cache.py
@ -18,7 +18,7 @@ from utils import json
 from .file import makedirs
 import net
-from net import DEFAULT_HEADERS, getEncoding
+from net import DEFAULT_HEADERS, detect_encoding
 cache_timeout = 30*24*60*60 # default is 30 days
@ -40,7 +40,7 @@ def status(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
      >>> status('http://google.com/mysearch')
      404
    '''
-    headers = getHeaders(url, data, headers)
+    headers = get_headers(url, data, headers)
    return int(headers['status'])
 def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
@ -55,10 +55,10 @@ def exists(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
        return True
    return False
-def getHeaders(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
+def get_headers(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout):
    url_headers = store.get(url, data, headers, timeout, "headers")
    if not url_headers:
-        url_headers = net.getHeaders(url, data, headers)
+        url_headers = net.get_headers(url, data, headers)
        store.set(url, data, -1, url_headers)
    return url_headers
@ -68,7 +68,7 @@ class InvalidResult(Exception):
        self.result = result
        self.headers = headers
-def readUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, valid=None):
+def read_url(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, valid=None, unicode=False):
    '''
        url     - url to load
        data    - possible post data
@ -80,31 +80,29 @@ def readUrl(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, vali
    #FIXME: send last-modified / etag from cache and only update if needed
    if isinstance(url, unicode):
        url = url.encode('utf-8')
-    result = store.get(url, data, headers, timeout)
+    data = store.get(url, data, headers, timeout)
-    if not result:
+    if not data:
        #print "get data", url
        try:
-            url_headers, result = net.readUrl(url, data, headers, returnHeaders=True)
+            url_headers, data = net.read_url(url, data, headers, return_headers=True)
        except urllib2.HTTPError, e:
            e.headers['Status'] = "%s" % e.code
            url_headers = dict(e.headers)
-            result = e.read()
+            data = e.read()
            if url_headers.get('content-encoding', None) == 'gzip':
-                result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read()
+                data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
-        if not valid or valid(result, url_headers):
+        if not valid or valid(data, url_headers):
-            store.set(url, data, result, url_headers)
+            store.set(url, data, data, url_headers)
        else:
-            raise InvalidResult(result, url_headers)
+            raise InvalidResult(data, url_headers)
-    return result
+    if unicode:
        encoding = detect_encoding(data)
        if not encoding:
            encoding = 'latin-1'
        data = data.decode(encoding)
    return data
-def readUrlUnicode(url, data=None, headers=DEFAULT_HEADERS, timeout=cache_timeout, _readUrl=readUrl, valid=None):
+def save_url(url, filename, overwrite=False):
    data = _readUrl(url, data, headers, timeout, valid)
    encoding = getEncoding(data)
    if not encoding:
        encoding = 'latin-1'
    return unicode(data, encoding)
 def saveUrl(url, filename, overwrite=False):
    if not os.path.exists(filename) or overwrite:
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
--- a/ox/format.py
+++ b/ox/format.py
@ -217,15 +217,15 @@ def to36(q):
 def from36(q):
    return int(q, 36)
-def intValue(strValue, default=u''):
+def int_value(strValue, default=u''):
    """
-    >>> intValue('abc23')
+    >>> int_value('abc23')
    u'23'
-    >>> intValue(' abc23')
+    >>> int_value(' abc23')
    u'23'
-    >>> intValue('ab')
+    >>> int_value('ab')
    u''
    """
    try:
@ -234,15 +234,15 @@ def intValue(strValue, default=u''):
        val = default
    return val
-def floatValue(strValue, default=u''):
+def float_value(strValue, default=u''):
    """
-    >>> floatValue('abc23.4')
+    >>> float_value('abc23.4')
    u'23.4'
-    >>> floatValue(' abc23.4')
+    >>> float_value(' abc23.4')
    u'23.4'
-    >>> floatValue('ab')
+    >>> float_value('ab')
    u''
    """
    try:
@ -251,46 +251,46 @@ def floatValue(strValue, default=u''):
        val = default
    return val
-def formatNumber(number, longName, shortName):
+def format_number(number, longName, shortName):
    """
    Return the number in a human-readable format (23 KB, 23.4 MB, 23.42 GB)
-    >>> formatNumber(123, 'Byte', 'B')
+    >>> format_number(123, 'Byte', 'B')
    '123 Bytes'
-    >>> formatNumber(1234, 'Byte', 'B')
+    >>> format_number(1234, 'Byte', 'B')
    '1 KB'
-    >>> formatNumber(1234567, 'Byte', 'B')
+    >>> format_number(1234567, 'Byte', 'B')
    '1.2 MB'
-    >>> formatNumber(1234567890, 'Byte', 'B')
+    >>> format_number(1234567890, 'Byte', 'B')
    '1.15 GB'
-    >>> formatNumber(1234567890123456789, 'Byte', 'B')
+    >>> format_number(1234567890123456789, 'Byte', 'B')
    '1,096.5166 PB'
-    >>> formatNumber(-1234567890123456789, 'Byte', 'B')
+    >>> format_number(-1234567890123456789, 'Byte', 'B')
    '-1,096.5166 PB'
    """
    if abs(number) < 1024:
-        return '%s %s%s' % (formatThousands(number), longName, number != 1 and 's' or '')
+        return '%s %s%s' % (format_thousands(number), longName, number != 1 and 's' or '')
    prefix = ['K', 'M', 'G', 'T', 'P']
    for i in range(5):
        if abs(number) < math.pow(1024, i + 2) or i == 4:
            n = number / math.pow(1024, i + 1)
-            return '%s %s%s' % (formatThousands('%.*f' % (i, n)), prefix[i], shortName)
+            return '%s %s%s' % (format_thousands('%.*f' % (i, n)), prefix[i], shortName)
-def formatThousands(number, separator = ','):
+def format_thousands(number, separator = ','):
    """
    Return the number with separators (1,000,000)
-    >>> formatThousands(1)
+    >>> format_thousands(1)
    '1'
-    >>> formatThousands(1000)
+    >>> format_thousands(1000)
    '1,000'
-    >>> formatThousands(1000000)
+    >>> format_thousands(1000000)
    '1,000,000'
    """
    string = str(number).split('.')
@ -302,16 +302,16 @@ def formatThousands(number, separator = ','):
    string[0] = ''.join(l)
    return '.'.join(string)
-def formatBits(number):
+def format_bits(number):
-    return formatNumber(number, 'bit', 'b')
+    return format_number(number, 'bit', 'b')
-def formatBytes(number):
+def format_bytes(number):
-    return formatNumber(number, 'byte', 'B')
+    return format_number(number, 'byte', 'B')
-def formatPixels(number):
+def format_pixels(number):
-    return formatNumber(number, 'pixel', 'px')
+    return format_number(number, 'pixel', 'px')
-def formatCurrency(amount, currency="$"):
+def format_currency(amount, currency="$"):
  if amount:
    temp = "%.2f" % amount
    profile=re.compile(r"(\d)(\d\d\d[.,])")
@ -336,9 +336,9 @@ def plural(amount, unit, plural='s'):
        if plural == 's':
            unit = unit + plural
        else: unit = plural
-    return "%s %s" % (formatThousands(amount), unit)
+    return "%s %s" % (format_thousands(amount), unit)
-def formatDuration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
+def format_duration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
    '''
    verbosity
        0: D:HH:MM:SS
@ -353,13 +353,13 @@ def formatDuration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
    milliseconds
        True: always display milliseconds
        False: never display milliseconds
-    >>> formatDuration(1000 * 60 * 60 * 24 * 366)
+    >>> format_duration(1000 * 60 * 60 * 24 * 366)
    '1:001:00:00:00.000'
-    >>> formatDuration(1000 * 60 * 60 * 24 * 366, years=False)
+    >>> format_duration(1000 * 60 * 60 * 24 * 366, years=False)
    '366:00:00:00.000'
-    >>> formatDuration(1000 * 60 * 60 * 24 * 365 + 2003, verbosity=2)
+    >>> format_duration(1000 * 60 * 60 * 24 * 365 + 2003, verbosity=2)
    '1 year 2 seconds 3 milliseconds'
-    >>> formatDuration(1000 * 30, hours=False, milliseconds=False)
+    >>> format_duration(1000 * 30, hours=False, milliseconds=False)
    '00:30'
    '''
    if not ms and ms != 0:
@ -403,7 +403,7 @@ def formatDuration(ms, verbosity=0, years=True, hours=True, milliseconds=True):
    return duration
 def ms2runtime(ms, shortenLong=False):
-    # deprecated - use formatDuration
+    # deprecated - use format_duration
    '''
    >>> ms2runtime(5000)
    '5 seconds'
@ -415,11 +415,11 @@ def ms2runtime(ms, shortenLong=False):
    '13 hours 53 minutes'
    '''
    if shortenLong and ms > 1000 * 60 * 60 * 24 * 464:
-        return formatDuration(ms, verbosity=1, milliseconds=False)
+        return format_duration(ms, verbosity=1, milliseconds=False)
-    return formatDuration(ms, verbosity=2, milliseconds=False)
+    return format_duration(ms, verbosity=2, milliseconds=False)
 def ms2playtime(ms, hours=False):
-    # deprecated - use formatDuration
+    # deprecated - use format_duration
    '''
    >>> ms2playtime(5000)
    '00:05'
@ -428,15 +428,15 @@ def ms2playtime(ms, hours=False):
    >>> ms2playtime(50000000)
    '13:53:20'
    '''
-    return formatDuration(ms, hours=False, years=False, milliseconds=False)
+    return format_duration(ms, hours=False, years=False, milliseconds=False)
 def ms2time(ms):
-    # deprecated - use formatDuration
+    # deprecated - use format_duration
    '''
    >>> ms2time(44592123)
    '12:23:12.123'
    '''
-    return formatDuration(ms, years=False)
+    return format_duration(ms, years=False)
 def time2ms(timeString):
    '''
@ -451,7 +451,7 @@ def time2ms(timeString):
        ms = ms * 60 + float(_p)
    return int(ms * 1000)
-def shiftTime(offset, timeString):
+def shift_time(offset, timeString):
    newTime = time2ms(timeString) + offset
    return ms2time(newTime)
--- a/ox/net.py
+++ b/ox/net.py
@ -22,7 +22,7 @@ DEFAULT_HEADERS = {
 def status(url, data=None, headers=DEFAULT_HEADERS):
    try:
-        f = openUrl(url, data, headers)
+        f = open_url(url, data, headers)
        s = f.code
    except urllib2.HTTPError, e:
        s = e.code
@ -34,9 +34,9 @@ def exists(url, data=None, headers=DEFAULT_HEADERS):
        return True
    return False
-def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
+def headers(url, data=None, headers=DEFAULT_HEADERS):
    try:
-        f = openUrl(url, data, headers)
+        f = open_url(url, data, headers)
        f.headers['Status'] = "%s" % f.code
        headers = f.headers
        f.close()
@ -45,30 +45,28 @@ def getHeaders(url, data=None, headers=DEFAULT_HEADERS):
        headers = e.headers
    return dict(headers)
-def openUrl(url, data=None, headers=DEFAULT_HEADERS):
+def open_url(url, data=None, headers=DEFAULT_HEADERS):
    url = url.replace(' ', '%20')
    req = urllib2.Request(url, data, headers)
    return urllib2.urlopen(req)
-def readUrl(url, data=None, headers=DEFAULT_HEADERS, returnHeaders=False):
+def read_url(url, data=None, headers=DEFAULT_HEADERS, return_headers=False, unicode=False):
-    f = openUrl(url, data, headers)
+    f = open_url(url, data, headers)
    data = f.read()
    f.close()
    if f.headers.get('content-encoding', None) == 'gzip':
        data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
-    if returnHeaders:
+    if unicode:
        encoding = detect_encoding(data)
        if not encoding:
            encoding = 'latin-1'
        data = data.decode(encoding)
    if return_headers:
        f.headers['Status'] = "%s" % f.code
        return dict(f.headers), data
    return data
-def readUrlUnicode(url, data=None, headers=DEFAULT_HEADERS):
+def detect_encoding(data):
    data = readUrl(url, data, headers)
    encoding = getEncoding(data)
    if not encoding:
        encoding = 'latin-1'
    return unicode(data, encoding)
 def getEncoding(data):
    if 'content="text/html; charset=utf-8"' in data:
        return 'utf-8'
    elif 'content="text/html; charset=iso-8859-1"' in data:
@ -81,7 +79,7 @@ def getEncoding(data):
    detector.close()
    return detector.result['encoding']
-def saveUrl(url, filename, overwrite=False):
+def save_url(url, filename, overwrite=False):
    if not os.path.exists(filename) or overwrite:
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
--- a/ox/srt.py
+++ b/ox/srt.py
@ -97,8 +97,8 @@ def encode(data):
    for s in data:
        srt += '%d\r\n%s --> %s\r\n%s\r\n\r\n' % (
            i,
-            ox.formatDuration(s['in']*1000, years=False).replace('.', ','),
+            ox.format_duration(s['in']*1000, years=False).replace('.', ','),
-            ox.formatDuration(s['out']*1000, years=False).replace('.', ','),
+            ox.format_duration(s['out']*1000, years=False).replace('.', ','),
            s['value'].replace('\n', '\r\n').strip()
        )
        i += 1
--- a/ox/web/allmovie.py
+++ b/ox/web/allmovie.py
@ -3,8 +3,8 @@
 import re
 import time
-from ox import stripTags, findRe
+from ox import strip_tags, findRe
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 def getId(url):
@ -26,7 +26,7 @@ def getData(id):
    data = {
        "url": getUrl(id)
    }
-    html = readUrlUnicode(data["url"])
+    html = read_url(data["url"], unicode=True)
    data['aka'] = parseList(html, 'AKA')
    data['category'] = findRe(html, '<dt>category</dt>.*?<dd>(.*?)</dd>')
    data['countries'] = parseList(html, 'countries')
@ -40,18 +40,18 @@ def getData(id):
    data['releasedate'] = parseList(html, 'release date')
    data['runtime'] = parseEntry(html, 'run time').replace('min.', '').strip()
    data['set'] = parseEntry(html, 'set in')
-    data['synopsis'] = stripTags(findRe(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
+    data['synopsis'] = strip_tags(findRe(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
    data['themes'] = parseList(html, 'themes')
    data['types'] = parseList(html, 'types')
    data['year'] = findRe(html, '<span class="year">.*?(\d+)')
    #data['stills'] = [re.sub('_derived.*?/', '', i) for i in re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)]
    data['stills'] = re.compile('<a href="#" title="movie still".*?<img src="(.*?)"', re.DOTALL).findall(html)
-    #html = readUrlUnicode("http://allmovie.com/work/%s/cast" % id)
+    #html = read_url("http://allmovie.com/work/%s/cast" % id, unicode=True)
    #data['cast'] = parseTable(html)
-    #html = readUrlUnicode("http://allmovie.com/work/%s/credits" % id)
+    #html = read_url("http://allmovie.com/work/%s/credits" % id, unicode=True)
    #data['credits'] = parseTable(html)
-    html = readUrlUnicode("http://allmovie.com/work/%s/review" % id)
+    html = read_url("http://allmovie.com/work/%s/review" % id, unicode=True)
-    data['review'] = stripTags(findRe(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
+    data['review'] = strip_tags(findRe(html, '<div class="toggle-text" itemprop="description">(.*?)</div>')).strip()
    return data
 def getUrl(id):
@ -59,26 +59,26 @@ def getUrl(id):
 def parseEntry(html, title):
    html = findRe(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title)
-    return stripTags(html).strip()
+    return strip_tags(html).strip()
 def parseList(html, title):
    html = findRe(html, '<dt>%s</dt>.*?<dd>(.*?)</dd>' % title.lower())
-    r = map(lambda x: stripTags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
+    r = map(lambda x: strip_tags(x), re.compile('<li>(.*?)</li>', re.DOTALL).findall(html))
    if not r and html:
-        r = [stripTags(html)]
+        r = [strip_tags(html)]
    return r
 def parseTable(html):
    return map(
        lambda x: map(
-            lambda x: stripTags(x).strip().replace('&nbsp;', ''),
+            lambda x: strip_tags(x).strip().replace('&nbsp;', ''),
            x.split('<td width="305">-')
        ),
        findRe(html, '<div id="results-table">(.*?)</table>').split('</tr>')[:-1]
    )
 def parseText(html, title):
-    return stripTags(findRe(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
+    return strip_tags(findRe(html, '%s</td>.*?<td colspan="2"><p>(.*?)</td>' % title)).strip()
 if __name__ == '__main__':
    print getData('129689')
--- a/ox/web/amazon.py
+++ b/ox/web/amazon.py
@ -3,14 +3,14 @@
 import re
 from urllib import quote
-from ox import findRe, stripTags, decodeHtml
+from ox import findRe, strip_tags, decodeHtml
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 def findISBN(title, author):
    q = '%s %s' % (title, author)
    url = "http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Dstripbooks&field-keywords=" + "%s&x=0&y=0" % quote(q)
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    links = re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)
    id = findRe(re.compile('href="(http://www.amazon.com/.*?/dp/.*?)"').findall(data)[0], '/dp/(.*?)/')
    data = getData(id)
@ -20,7 +20,7 @@ def findISBN(title, author):
 def getData(id):
    url = "http://www.amazon.com/title/dp/%s/" % id
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    def findData(key):
@ -44,9 +44,9 @@ def getData(id):
    if not r['pages']:
        r['pages'] = findData('Hardcover')
-    r['review'] = stripTags(findRe(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
+    r['review'] = strip_tags(findRe(data, '<h3 class="productDescriptionSource">Review</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
-    r['description'] = stripTags(findRe(data, '<h3 class="productDescriptionSource">Product Description</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
+    r['description'] = strip_tags(findRe(data, '<h3 class="productDescriptionSource">Product Description</h3>.*?<div class="productDescriptionWrapper">(.*?)</div>').replace('<br />', '\n')).strip()
    r['cover'] = re.findall('src="(.*?)" id="prodImage"', data)
    if r['cover']:
--- a/ox/web/apple.py
+++ b/ox/web/apple.py
@ -1,7 +1,7 @@
 import json
 import re
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 HEADERS = {
    'User-Agent': 'iTunes/10.4 (Macintosh; Intel Mac OS X 10.7) AppleWebKit/534.48.3',
@ -26,21 +26,21 @@ def getMovieData(title, director):
    url += '&actorNames=&directorProducerName=' + director
    url += '&releaseYearTerm=&descriptionTerm=&genreIndex=1&ratingIndex=1'
    HEADERS['Referer'] = url
-    html = readUrlUnicode(url, headers=HEADERS)
+    html = read_url(url, headers=HEADERS, unicode=True)
    regexp = '<a href="(http://itunes.apple.com/us/movie/.*?)" class="artwork-link"><div class="artwork">'
    regexp += '<img width=".*?" height=".*?" alt=".*?" class="artwork" src="(.*?)" /></div></a>'
    results = re.compile(regexp).findall(html)
    if results:
        data['link'] = results[0][0]
        data['poster'] = results[0][1].replace('140x140', '600x600')
-        html = readUrlUnicode(data['link'], headers=HEADERS)
+        html = read_url(data['link'], headers=HEADERS, unicode=True)
        results = re.compile('video-preview-url="(.*?)"').findall(html)
        if results:
            data['trailer'] = results[0]
    # trailers section (preferred source for poster and trailer)
    host = 'http://trailers.apple.com'
    url = host + '/trailers/home/scripts/quickfind.php?callback=searchCallback&q=' + title
-    js = json.loads(readUrlUnicode(url)[16:-4])
+    js = json.loads(read_url(url, unicode=True)[16:-4])
    results = js['results']
    if results:
        url = host + results[0]['location']
@ -49,11 +49,11 @@ def getMovieData(title, director):
        headers = {
            'User-Agent': USER_AGENT
        }
-        html = readUrlUnicode(url, headers=headers)
+        html = read_url(url, headers=headers, unicode=True)
        results = re.compile('"(' + host + '.*?poster\.jpg)"').findall(html)
        if results:
            data['poster'] = results[0].replace('poster.jpg', 'poster-xlarge.jpg')
-        html = readUrlUnicode(url + 'includes/playlists/web.inc', headers=headers)
+        html = read_url(url + 'includes/playlists/web.inc', headers=headers, unicode=True)
        results = re.compile('"(' + host + '\S+\.mov)"').findall(html)
        if results:
            data['trailer'] = results[-1]
--- a/ox/web/archive.py
+++ b/ox/web/archive.py
@ -12,7 +12,7 @@ def getUrl(id):
 def getData(id):
    data = {}
    url = getUrl(id)
-    details = cache.readUrl('%s?output=json' % url)
+    details = cache.read_url('%s?output=json' % url)
    details = json.loads(details)
    for key in ('title', 'description', 'runtime'):
        data[key] = details['metadata'][key]
--- a/ox/web/criterion.py
+++ b/ox/web/criterion.py
@ -3,8 +3,8 @@
 import re
 import ox.cache
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
-from ox.html import stripTags
+from ox.html import strip_tags
 from ox.text import findRe, removeSpecialCharacters
 import imdb
@ -30,19 +30,19 @@ def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
        "url": getUrl(id)
    }
    try:
-        html = readUrlUnicode(data["url"], timeout=timeout)
+        html = read_url(data["url"], timeout=timeout, unicode=True)
    except:
-        html = ox.cache.readUrl(data["url"], timeout=timeout)
+        html = ox.cache.read_url(data["url"], timeout=timeout)
    data["number"] = findRe(html, "<li>Spine #(\d+)")
    data["title"] = findRe(html, "<meta property=['\"]og:title['\"] content=['\"](.*?)['\"]")
    data["title"] = data["title"].split(u' \u2014 The Television Version')[0]
-    data["director"] = stripTags(findRe(html, "<h2 class=\"director\">(.*?)</h2>"))
+    data["director"] = strip_tags(findRe(html, "<h2 class=\"director\">(.*?)</h2>"))
    results = findRe(html, '<div class="left_column">(.*?)</div>')
    results = re.compile("<li>(.*?)</li>").findall(results)
    data["country"] = results[0]
    data["year"] = results[1]
-    data["synopsis"] = stripTags(findRe(html, "<p><strong>SYNOPSIS:</strong> (.*?)</p>"))
+    data["synopsis"] = strip_tags(findRe(html, "<p><strong>SYNOPSIS:</strong> (.*?)</p>"))
    result = findRe(html, "<div class=\"purchase\">(.*?)</div>")
    if 'Blu-Ray' in result or 'Essential Art House DVD' in result:
@ -53,7 +53,7 @@ def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
    if not "/boxsets/" in result:
        data["posters"] = [result]
    else:
-        html_ = readUrlUnicode(result)
+        html_ = read_url(result, unicode=True)
        result = findRe(html_, '<a href="http://www.criterion.com/films/%s.*?">(.*?)</a>' % id)
        result = findRe(result, "src=\"(.*?)\"")
        if result:
@ -77,7 +77,7 @@ def getData(id, timeout=ox.cache.cache_timeout, get_imdb=False):
 def getIds():
    ids = []
-    html = readUrlUnicode("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine")
+    html = read_url("http://www.criterion.com/library/expanded_view?m=dvd&p=1&pp=50&s=spine", unicode=True)
    results = re.compile("\&amp;p=(\d+)\&").findall(html)
    pages = max(map(int, results))
    for page in range(1, pages):
@ -88,13 +88,13 @@ def getIds():
 def getIdsByPage(page):
    ids = []
    url = "http://www.criterion.com/library/expanded_view?m=dvd&p=%s&pp=50&s=spine" % page
-    html = readUrlUnicode(url)
+    html = read_url(url, unicode=True)
    results = re.compile("films/(\d+)").findall(html)
    for result in results:
        ids.append(result)
    results = re.compile("boxsets/(.*?)\"").findall(html)
    for result in results:
-        html = readUrlUnicode("http://www.criterion.com/boxsets/" + result)
+        html = read_url("http://www.criterion.com/boxsets/" + result, unicode=True)
        results = re.compile("films/(\d+)").findall(html)
        for result in results:
            ids.append(result)
--- a/ox/web/dailymotion.py
+++ b/ox/web/dailymotion.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re
 from urllib import unquote
-from ox.cache import readUrl
+from ox.cache import read_url
 def getVideoUrl(url):
@ -13,7 +13,7 @@ def getVideoUrl(url):
    >>> getVideoUrl('http://www.dailymotion.com/relevance/search/priere%2Bpour%2Brefuznik/video/x3ou94_priere-pour-refuznik-2-jeanluc-goda_shortfilms').split('?auth')[0]
    'http://www.dailymotion.com/cdn/FLV-320x240/video/x3ou94_priere-pour-refuznik-2-jean-luc-god_shortfilms.flv'
    '''
-    data = readUrl(url)
+    data = read_url(url)
    video = re.compile('''video", "(.*?)"''').findall(data)
    for v in video:
       v =  unquote(v).split('@@')[0]
--- a/ox/web/duckduckgo.py
+++ b/ox/web/duckduckgo.py
@ -3,9 +3,9 @@
 import re
 import urllib
 import ox
-from ox import stripTags, decodeHtml
+from ox import strip_tags, decodeHtml
 from ox.utils import json
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 def find(query, timeout=ox.cache.cache_timeout):
@ -13,10 +13,10 @@ def find(query, timeout=ox.cache.cache_timeout):
        query = query.encode('utf-8')
    params = urllib.urlencode({'q': query})
    url = 'http://duckduckgo.com/html/?' + params
-    data = readUrlUnicode(url, timeout=timeout)
+    data = read_url(url, timeout=timeout, unicode=True)
    results = []
    regex = '<a .*?class="l le" href="(.+?)">(.*?)</a>.*?<div class="cra">(.*?)</div>'
    for r in re.compile(regex, re.DOTALL).findall(data):
-        results.append((stripTags(decodeHtml(r[1])), r[0], stripTags(decodeHtml(r[2]))))
+        results.append((strip_tags(decodeHtml(r[1])), r[0], strip_tags(decodeHtml(r[2]))))
    return results
--- a/ox/web/epguides.py
+++ b/ox/web/epguides.py
@ -3,8 +3,8 @@
 import re
 import time
-from ox import stripTags, findRe
+from ox import strip_tags, findRe
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 import google
@ -21,9 +21,9 @@ def getShowUrl(title):
    return None
 def getShowData(url):
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    r = {}
-    r['title'] = stripTags(findRe(data, '<h1>(.*?)</h1>'))
+    r['title'] = strip_tags(findRe(data, '<h1>(.*?)</h1>'))
    r['imdb'] = findRe(data, '<h1><a href=".*?/title/tt(\d.*?)">.*?</a></h1>')
    r['episodes'] = {}
    #1.   1- 1       1001      7 Aug 05   You Can't Miss the Bear
--- a/ox/web/flixter.py
+++ b/ox/web/flixter.py
@ -4,8 +4,8 @@
 import re
 from lxml.html import document_fromstring
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
-from ox import findRe, stripTags
+from ox import findRe, strip_tags
 from ox.web.imdb import ImdbCombined
@ -32,7 +32,7 @@ def getData(id, timeout=-1):
    data = {
        "url": getUrl(id),
    }
-    html = readUrlUnicode(data['url'], timeout=timeout)
+    html = read_url(data['url'], timeout=timeout, timeout=True)
    doc = document_fromstring(html)
    props = {
--- a/ox/web/freebase.py
+++ b/ox/web/freebase.py
@ -2,7 +2,7 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import json
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 from ox import findRe
 class Imdb(dict):
@ -12,7 +12,7 @@ class Imdb(dict):
            "http://graph.freebase.com/imdb.title.tt%s" % id
            might also be of interest at some point, right now not much info
        '''
-        data = readUrlUnicode(url)
+        data = read_url(url, unicode=True)
        try:
            data = json.loads(data)
        except ValueError:
--- a/ox/web/google.py
+++ b/ox/web/google.py
@ -4,13 +4,13 @@ import re
 import urllib
 import ox
-from ox import stripTags, decodeHtml
+from ox import strip_tags, decodeHtml
 DEFAULT_MAX_RESULTS = 10
 DEFAULT_TIMEOUT = 24*60*60
-def readUrlUnicode(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
+def read_url(url, data=None, headers=ox.net.DEFAULT_HEADERS, timeout=DEFAULT_TIMEOUT):
-    return ox.cache.readUrlUnicode(url, data, headers, timeout)
+    return ox.cache.read_url(url, data, headers, timeout, unicode=True)
 def quote_plus(s):
    if not isinstance(s, str):
@ -28,13 +28,13 @@ def find(query, max_results=DEFAULT_MAX_RESULTS, timeout=DEFAULT_TIMEOUT):
    u'http://www.imdb.com/title/tt0133093/'
    """
    url = 'http://google.com/search?q=%s' % quote_plus(query)
-    data = readUrlUnicode(url, timeout=timeout)
+    data = read_url(url, timeout=timeout)
    results = []
    data = re.sub('<span class="f">(.*?)</span>', '\\1', data)
    for a in re.compile(
        '<a href="(\S+?)" class=l .*?>(.*?)</a>.*?<span class="st">(.*?)<\/span>'
    ).findall(data):
-        results.append((stripTags(decodeHtml(a[1])), a[0], stripTags(decodeHtml(a[2]))))
+        results.append((strip_tags(decodeHtml(a[1])), a[0], strip_tags(decodeHtml(a[2]))))
        if len(results) >= max_results:
            break
    return results
--- a/ox/web/imdb.py
+++ b/ox/web/imdb.py
@ -8,7 +8,7 @@ import time
 import unicodedata
 import ox
-from ox import findRe, stripTags
+from ox import findRe, strip_tags
 from ox.normalize import normalizeTitle, normalizeImdbId
 import ox.cache
@ -16,12 +16,9 @@ from siteparser import SiteParser
 import google
-def readUrl(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None):
+def read_url(url, data=None, headers=ox.cache.DEFAULT_HEADERS, timeout=ox.cache.cache_timeout, valid=None, unicode=False):
    headers = headers.copy()
-    return ox.cache.readUrl(url, data, headers, timeout)
+    return ox.cache.read_url(url, data, headers, timeout, unicode=unicode)
 def readUrlUnicode(url, timeout=ox.cache.cache_timeout):
   return ox.cache.readUrlUnicode(url, _readUrl=readUrl, timeout=timeout)
 def getUrl(id):
    return "http://www.imdb.com/title/tt%s/" % id
@ -61,7 +58,7 @@ class Imdb(SiteParser):
            'page': 'combined',
            're': [
                '<td class="nm">.*?>(.*?)</a>.*?<td class="char">(.*?)</td>',
-                lambda ll: [stripTags(l) for l in ll]
+                lambda ll: [strip_tags(l) for l in ll]
             ],
            'type': 'list'
        },
@ -266,8 +263,8 @@ class Imdb(SiteParser):
        }
    }
-    def readUrlUnicode(self, url, timeout):
+    def read_url(self, url, timeout):
-        return readUrlUnicode(url, timeout)
+        return read_url(url, timeout, unicode=True)
    def __init__(self, id, timeout=-1):
        #use akas.imdb.com to always get original title:
@ -276,7 +273,7 @@ class Imdb(SiteParser):
        super(Imdb, self).__init__(timeout)
        url = self.baseUrl + 'combined' 
-        page = self.readUrlUnicode(url, timeout=-1)
+        page = self.read_url(url, timeout=-1)
        if '<title>IMDb: Page not found</title>' in page \
            or 'The requested URL was not found on our server.' in page:
            return
@ -460,7 +457,7 @@ def getMovieIdByTitle(title, timeout=-1):
            params['q'] = params['q'].encode('utf-8')
    params = urllib.urlencode(params)
    url = "http://akas.imdb.com/find?" + params
-    data = readUrlUnicode(url, timeout=timeout)
+    data = read_url(url, timeout=timeout, unicode=True)
    #if search results in redirect, get id of current page
    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
    results = re.compile(r).findall(data)    
@ -538,7 +535,7 @@ def getMovieId(title, director='', year='', timeout=-1):
    url = "http://akas.imdb.com/find?" + params
    #print url
-    data = readUrlUnicode(url, timeout=timeout)
+    data = read_url(url, timeout=timeout, unicode=True)
    #if search results in redirect, get id of current page
    r = '<meta property="og:url" content="http://www.imdb.com/title/tt(\d{7})/" />'
    results = re.compile(r).findall(data)    
@ -569,7 +566,7 @@ def getMoviePoster(imdbId):
    info = ImdbCombined(imdbId)
    if 'posterId' in info:
        url = "http://www.imdb.com/rg/action-box-title/primary-photo/media/rm%s/tt%s" % (info['posterId'], imdbId)
-        data = readUrl(url)
+        data = read_url(url)
        poster = findRe(data, 'img id="primary-img".*?src="(.*?)"')
        return poster
    elif 'series' in info:
@ -578,7 +575,7 @@ def getMoviePoster(imdbId):
 def maxVotes():
    url = 'http://www.imdb.com/search/title?num_votes=500000,&sort=num_votes,desc'
-    data = ox.cache.readUrl(url)
+    data = ox.cache.read_url(url)
    votes = max([int(v.replace(',', ''))
        for v in re.compile('<td class="sort_col">([\d,]+)</td>').findall(data)])
    return votes
--- a/ox/web/impawards.py
+++ b/ox/web/impawards.py
@ -2,8 +2,8 @@
 # encoding: utf-8
 import re
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
-from ox.html import stripTags
+from ox.html import strip_tags
 from ox.text import findRe
@ -21,11 +21,11 @@ def getData(id):
    data = {
        'url': getUrl(id)
    }
-    html = readUrlUnicode(data['url'])
+    html = read_url(data['url'], unicode=True)
    data['imdbId'] = findRe(html, 'imdb.com/title/tt(\d{7})')
    if not data['imdbId']:
        data['imdbId'] = _id_map.get(id, '')
-    data['title'] = stripTags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
+    data['title'] = strip_tags(findRe(html, '<p class="name white">(.*?) \(<a href="alpha1.html">'))
    data['year'] = findRe(html, '\(<a href="alpha1.html">(.*?)</a>\)')
    data['posters'] = []
    poster = findRe(html, '<img src="(posters.*?)"')
@ -36,11 +36,11 @@ def getData(id):
    for result in results:
        result = result.replace('_xlg.html', '.html')
        url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
-        html = readUrlUnicode(url)
+        html = read_url(url, unicode=True)
        result = findRe(html, '<a href = (\w*?_xlg.html)')
        if result:
            url = 'http://www.impawards.com/%s/%s' % (data['year'], result)
-            html = readUrlUnicode(url)
+            html = read_url(url, unicode=True)
            poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img SRC="(.*?)"'))
        else:
            poster = 'http://www.impawards.com/%s/%s' % (data['year'], findRe(html, '<img src="(posters.*?)"'))
@ -61,7 +61,7 @@ def getId(url):
 def getIds():
    ids = []
-    html = readUrlUnicode('http://www.impawards.com/archives/latest.html', timeout = 60*60)
+    html = read_url('http://www.impawards.com/archives/latest.html', timeout = 60*60, unicode=True)
    pages = int(findRe(html, '<a href= page(.*?).html>')) + 1
    for page in range(pages, 0, -1):
        for id in getIdsByPage(page):
@ -71,7 +71,7 @@ def getIds():
 def getIdsByPage(page):
    ids = []
-    html = readUrlUnicode('http://www.impawards.com/archives/page%s.html' % page, timeout = -1)
+    html = read_url('http://www.impawards.com/archives/page%s.html' % page, timeout = -1, unicode=True)
    results = re.compile('<a href = \.\./(.*?)>', re.DOTALL).findall(html)
    for result in results:
        url = 'http://impawards.com/%s' % result
@ -80,7 +80,7 @@ def getIdsByPage(page):
 def getUrl(id):
    url = u"http://www.impawards.com/%s.html" % id
-    html = readUrlUnicode(url)
+    html = read_url(url, unicode=True)
    if findRe(html, "No Movie Posters on This Page"):
        url = u"http://www.impawards.com/%s_ver1.html" % id
    return url
--- a/ox/web/itunes.py
+++ b/ox/web/itunes.py
@ -3,8 +3,8 @@
 import re
 import urllib
-from ox.cache import readUrl
+from ox.cache import read_url
-from ox.html import decodeHtml, stripTags
+from ox.html import decodeHtml, strip_tags
 from ox.text import findRe
 from ox.text import findString
@ -113,20 +113,20 @@ class ItunesAlbum:
    def getId(self):
        url = composeUrl('advancedSearch', {'media': 'music', 'title': self.title, 'artist': self.artist})
-        xml = readUrl(url, headers = ITUNES_HEADERS)
+        xml = read_url(url, headers = ITUNES_HEADERS)
        id = findRe(xml, 'viewAlbum\?id=(.*?)&')
        return id
    def getData(self):
        data = {'id': self.id}
        url = composeUrl('viewAlbum', {'id': self.id})
-        xml = readUrl(url, None, ITUNES_HEADERS)
+        xml = read_url(url, None, ITUNES_HEADERS)
        data['albumName'] = findRe(xml, '<B>(.*?)</B>')
        data['artistName'] = findRe(xml, '<b>(.*?)</b>')
        data['coverUrl'] = findRe(xml, 'reflection="." url="(.*?)"')
        data['genre'] = findRe(xml, 'Genre:(.*?)<')
        data['releaseDate'] = findRe(xml, 'Released(.*?)<')
-        data['review'] = stripTags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
+        data['review'] = strip_tags(findRe(xml, 'REVIEW</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>'))
        data['tracks'] = []
        strings = findRe(xml, '<key>items</key>.*?<dict>(.*?)$').split('<dict>')
        for string in strings:
@ -144,14 +144,14 @@ class ItunesMovie:
    def getId(self):
        url = composeUrl('advancedSearch', {'media': 'movie', 'title': self.title, 'director': self.director})
-        xml = readUrl(url, headers = ITUNES_HEADERS)
+        xml = read_url(url, headers = ITUNES_HEADERS)
        id = findRe(xml, 'viewMovie\?id=(.*?)&')
        return id
    def getData(self):
        data = {'id': self.id}
        url = composeUrl('viewMovie', {'id': self.id})
-        xml = readUrl(url, None, ITUNES_HEADERS)
+        xml = read_url(url, None, ITUNES_HEADERS)
        f = open('/Users/rolux/Desktop/iTunesData.xml', 'w')
        f.write(xml)
        f.close()
--- a/ox/web/lyricsfly.py
+++ b/ox/web/lyricsfly.py
@ -1,15 +1,15 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from ox.cache import readUrl
+from ox.cache import read_url
 from ox.html import decodeHtml
 from ox.text import findRe
 def getLyrics(title, artist):
-    html = readUrl('http://lyricsfly.com/api/')
+    html = read_url('http://lyricsfly.com/api/')
    key = findRe(html, '<font color=green><b>(.*?)</b></font>')
    url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title)
-    xml = readUrl(url)
+    xml = read_url(url)
    lyrics = findRe(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com')
    lyrics = lyrics.replace('\n', '').replace('\r', '')
    lyrics = lyrics.replace('[br]', '\n').strip()
--- a/ox/web/metacritic.py
+++ b/ox/web/metacritic.py
@ -4,8 +4,8 @@ import re
 from urllib import quote
 from lxml.html import document_fromstring
-from ox.cache import readUrl, readUrlUnicode
+from ox.cache import read_url
-from ox import findRe, stripTags
+from ox import findRe, strip_tags
 def getUrl(id):
    return 'http://www.metacritic.com/movie/%s' % id
@ -15,18 +15,18 @@ def getId(url):
 def getUrlByImdb(imdb):
    url = "http://www.imdb.com/title/tt%s/criticreviews" % imdb
-    data = readUrl(url)
+    data = read_url(url)
    metacritic_url = findRe(data, '"(http://www.metacritic.com/movie/.*?)"')
    return metacritic_url or None
 def getMetacriticShowUrl(title):
    title = quote(title)
    url = "http://www.metacritic.com/search/process?ty=6&ts=%s&tfs=tvshow_title&x=0&y=0&sb=0&release_date_s=&release_date_e=&metascore_s=&metascore_e=" % title
-    data = readUrl(url)
+    data = read_url(url)
    return findRe(data, '(http://www.metacritic.com/tv/shows/.*?)\?')
 def getData(url):
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    doc = document_fromstring(data)
    score = filter(lambda s: s.attrib.get('property') == 'v:average',
                   doc.xpath('//span[@class="score_value"]'))
@ -51,7 +51,7 @@ def getData(url):
            'critic': authors[i],
            'url': urls[i],
            'source': sources[i],
-            'quote': stripTags(reviews[i]).strip(),
+            'quote': strip_tags(reviews[i]).strip(),
            'score': scores[i],
        })
--- a/ox/web/mininova.py
+++ b/ox/web/mininova.py
@ -5,8 +5,8 @@ import re
 import socket
 from urllib import quote
-from ox.cache import readUrl, readUrlUnicode
+from ox.cache import read_url
-from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, intValue, normalizeNewlines
+from ox import findRe, cache, strip_tags, decodeHtml, getTorrentInfo, int_value, normalizeNewlines
 from ox.normalize import normalizeImdbId
 import ox
@ -31,7 +31,7 @@ def findMovie(query, max_results=10):
    '''search for torrents on mininova
    '''
    url = "http://www.mininova.org/search/%s/seeds" % quote(query)
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    return _parseResultsPage(data, max_results)
 def findMovieByImdb(imdbId):
@ -39,7 +39,7 @@ def findMovieByImdb(imdbId):
    '''
    results = []
    imdbId = normalizeImdbId(imdbId)
-    data = readUrlUnicode("http://www.mininova.org/imdb/?imdb=%s" % imdbId)
+    data = read_url("http://www.mininova.org/imdb/?imdb=%s" % imdbId, unicode=True)
    return _parseResultsPage(data)
 def getId(mininovaId):
@ -55,7 +55,7 @@ def getId(mininovaId):
 def exists(mininovaId):
    mininovaId = getId(mininovaId)
-    data = ox.net.readUrl("http://www.mininova.org/tor/%s" % mininovaId)
+    data = ox.net.read_url("http://www.mininova.org/tor/%s" % mininovaId)
    if not data or 'Torrent not found...' in data:
        return False
    if 'tracker</a> of this torrent requires registration.' in data:
@ -74,22 +74,22 @@ def getData(mininovaId):
    torrent[u'torrent_link'] = "http://www.mininova.org/get/%s" % mininovaId
    torrent[u'details_link'] = "http://www.mininova.org/det/%s" % mininovaId
-    data = readUrlUnicode(torrent['comment_link']) + readUrlUnicode(torrent['details_link'])
+    data = read_url(torrent['comment_link'], unicode=True) + read_url(torrent['details_link'], unicode=True)
    if '<h1>Torrent not found...</h1>' in data:
        return None
    for d in re.compile('<p>.<strong>(.*?):</strong>(.*?)</p>', re.DOTALL).findall(data):
        key = d[0].lower().strip()
        key = _key_map.get(key, key)
-        value = decodeHtml(stripTags(d[1].strip()))
+        value = decodeHtml(strip_tags(d[1].strip()))
        torrent[key] = value
    torrent[u'title'] = findRe(data, '<title>(.*?):.*?</title>')
    torrent[u'imdbId'] = findRe(data, 'title/tt(\d{7})')
    torrent[u'description'] = findRe(data, '<div id="description">(.*?)</div>')
    if torrent['description']:
-        torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
+        torrent['description'] = normalizeNewlines(decodeHtml(strip_tags(torrent['description']))).strip()
-    t = readUrl(torrent[u'torrent_link'])
+    t = read_url(torrent[u'torrent_link'])
    torrent[u'torrent_info'] = getTorrentInfo(t)
    return torrent
@ -109,13 +109,13 @@ class Mininova(Torrent):
        self['seeder'] = -1
        self['leecher'] = -1
        if len(ratio) == 2:
-            val = intValue(ratio[0].replace(',','').strip())
+            val = int_value(ratio[0].replace(',','').strip())
            if val:
                self['seeder'] = int(val)
-            val = intValue(ratio[1].replace(',','').strip())
+            val = int_value(ratio[1].replace(',','').strip())
            if val:
                self['leecher'] = int(val)
-        val = intValue(self.data['downloads'].replace(',','').strip())
+        val = int_value(self.data['downloads'].replace(',','').strip())
        if val:
            self['downloaded'] = int(val)
        else:
--- a/ox/web/movieposterdb.py
+++ b/ox/web/movieposterdb.py
@ -3,7 +3,7 @@
 import re
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 from ox import findRe
 def getData(id):
@ -24,7 +24,7 @@ def getId(url):
 def getPostersByUrl(url, group=True, timeout=-1):
    posters = []
-    html = readUrlUnicode(url, timeout=timeout)
+    html = read_url(url, timeout=timeout, unicode=True)
    if url in html:
        if group:
            results = re.compile('<a href="(http://www.movieposterdb.com/group/.+?)\??">', re.DOTALL).findall(html)
@ -32,7 +32,7 @@ def getPostersByUrl(url, group=True, timeout=-1):
                posters += getPostersByUrl(result, False)
        results = re.compile('<a href="(http://www.movieposterdb.com/poster/.+?)">', re.DOTALL).findall(html)
        for result in results:
-            html = readUrlUnicode(result, timeout=timeout)
+            html = read_url(result, timeout=timeout, unicode=True)
            posters.append(findRe(html, '"(http://www.movieposterdb.com/posters/.+?\.jpg)"'))
    return posters
--- a/ox/web/opensubtitles.py
+++ b/ox/web/opensubtitles.py
@ -3,8 +3,8 @@
 import re
 import feedparser
-from ox.cache import readUrl, readUrlUnicode
+from ox.cache import read_url
-from ox import findRe, stripTags
+from ox import findRe, strip_tags
 from ox import langCode2To3, langTo3Code
 def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
@ -16,7 +16,7 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
    if language:
        url += "sublanguageid-%s/" % language
    url += "subsumcd-%s/subformat-srt/imdbid-%s/rss_2_00" % (parts, imdb)
-    data = readUrl(url)
+    data = read_url(url)
    if "title>opensubtitles.com - search results</title" in data:
        fd = feedparser.parse(data)
        opensubtitleId = None
@ -31,11 +31,11 @@ def findSubtitlesByImdb(imdb, parts = 1, language = "eng"):
 def downloadSubtitleById(opensubtitle_id):
    srts = {}
-    data = readUrl('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
+    data = read_url('http://www.opensubtitles.org/en/subtitles/%s' % opensubtitle_id)
    reg_exp = 'href="(/en/download/file/.*?)">(.*?)</a>'
    for f in re.compile(reg_exp, re.DOTALL).findall(data):
-        name = stripTags(f[1]).split('\n')[0]
+        name = strip_tags(f[1]).split('\n')[0]
        url = "http://www.opensubtitles.com%s" % f[0]
-        srts[name] = readUrlUnicode(url)
+        srts[name] = read_url(url, unicode=True)
    return srts
--- a/ox/web/piratecinema.py
+++ b/ox/web/piratecinema.py
@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from ox.net import readUrlUnicode
+from ox.net import read_url
 def getPosterUrl(id):
    url = 'http://piratecinema.org/posters/'
-    html = readUrlUnicode(url)
+    html = read_url(url, unicode=True)
    results = re.compile('src="(.+)" title=".+\((\d{7})\)"').findall(html)
    for result in results:
        if result[1] == id:
--- a/ox/web/rottentomatoes.py
+++ b/ox/web/rottentomatoes.py
@ -2,8 +2,8 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from ox.cache import getHeaders, readUrl, readUrlUnicode
+from ox.cache import getHeaders, read_url
-from ox import findRe, stripTags
+from ox import findRe, strip_tags
 def getUrlByImdb(imdb):
@ -14,7 +14,7 @@ def getUrlByImdb(imdb):
    return u.url
    '''
    url = "http://www.rottentomatoes.com/alias?type=imdbid&s=%s" % imdb
-    data = readUrl(url)
+    data = read_url(url)
    if "movie_title" in data:
        movies = re.compile('(/m/.*?/)').findall(data)
        if movies:
@ -25,13 +25,13 @@ def get_og(data, key):
    return findRe(data, '<meta property="og:%s".*?content="(.*?)"' % key)
 def getData(url):
-    data = readUrl(url)
+    data = read_url(url)
    r = {}
    r['title'] = findRe(data, '<h1 class="movie_title">(.*?)</h1>')
    if '(' in r['title']:
        r['year'] = findRe(r['title'], '\((\d*?)\)')
-        r['title'] = stripTags(re.sub('\((\d*?)\)', '', r['title'])).strip()
+        r['title'] = strip_tags(re.sub('\((\d*?)\)', '', r['title'])).strip()
-    r['summary'] = stripTags(findRe(data, '<p id="movieSynopsis" class="movie_synopsis" itemprop="description">(.*?)</p>')).strip()
+    r['summary'] = strip_tags(findRe(data, '<p id="movieSynopsis" class="movie_synopsis" itemprop="description">(.*?)</p>')).strip()
    r['summary'] = r['summary'].replace('\t', ' ').replace('\n', ' ').replace('  ', ' ').replace('  ', ' ')
    if not r['summary']:
        r['summary'] = get_og(data, 'description')
--- a/ox/web/siteparser.py
+++ b/ox/web/siteparser.py
@ -2,16 +2,16 @@
 # vi:si:et:sw=4:sts=4:ts=4
 import re
-from ..cache import readUrlUnicode
+from ..cache import read_url
-from .. import stripTags, decodeHtml
+from .. import strip_tags, decodeHtml
 from ..utils import datetime
 def cleanup(key, data, data_type):
    if data:
        if isinstance(data[0], basestring):
-            #FIXME: some types need stripTags
+            #FIXME: some types need strip_tags
-            #data = [stripTags(decodeHtml(p)).strip() for p in data]
+            #data = [strip_tags(decodeHtml(p)).strip() for p in data]
            data = [decodeHtml(p).strip() for p in data]
        elif isinstance(data[0], list) or isinstance(data[0], tuple):
            data = [cleanup(key, p, data_type) for p in data]
@ -30,13 +30,13 @@ class SiteParser(dict):
    def getUrl(self, page):
        return "%s%s" % (self.baseUrl, page)
-    def readUrlUnicode(self, url, timeout):
+    def read_url(self, url, timeout):
-        return readUrlUnicode(url, timeout=timeout)
+        return read_url(url, timeout=timeout, unicode=True)
    def __init__(self, timeout=-1):
        for key in self.regex:
            url = self.getUrl(self.regex[key]['page'])
-            data = self.readUrlUnicode(url, timeout)
+            data = self.read_url(url, timeout)
            if isinstance(self.regex[key]['re'], basestring):
                data = re.compile(self.regex[key]['re'], re.DOTALL).findall(data)
                data = cleanup(key, data, self.regex[key]['type'])
--- a/ox/web/spiegel.py
+++ b/ox/web/spiegel.py
@ -5,7 +5,7 @@ import re
 import time
 import ox.cache
-from ox.html import decodeHtml, stripTags
+from ox.html import decodeHtml, strip_tags
 import ox.net
@ -21,11 +21,11 @@ def getNews(year, month, day):
    for section in sections:
        url = 'http://www.spiegel.de/%s/0,1518,archiv-%d-%03d,00.html' % (section, year, day)
        if date == time.strftime('%d.%m.%Y', time.localtime()):
-            html = ox.net.readUrl(url)
+            html = ox.net.read_url(url)
        else:
-            html = ox.cache.readUrl(url)
+            html = ox.cache.read_url(url)
        for item in re.compile('<div class="spTeaserCenterpage(.*?)</p>', re.DOTALL).findall(html):
-            dateString = stripTags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
+            dateString = strip_tags(re.compile('<div class="spDateTime">(.*?)</div>', re.DOTALL).findall(item)[0]).strip()
            try:
                description = formatString(re.compile('<p>(.*?)<', re.DOTALL).findall(item)[0])
            except:
@ -104,12 +104,12 @@ def getIssue(year, week):
        return None
    url = 'http://service.spiegel.de/digas/servlet/epaper?Q=SP&JG=%d&AG=%d&SE=1&AN=INHALT' % (year, week)
    contents = []
-    data = ox.cache.readUrl(url)
+    data = ox.cache.read_url(url)
    items = re.compile('<a.?href="http://service.spiegel.de/digas/servlet/epaper\?Q=SP&JG=".?>(.*?)</a>').findall(data)
    for item in items:
        item = item[1]
        page = int(re.compile('&amp;SE=(.*?)"').findall(item)[0])
-        title = stripTags(item).strip()
+        title = strip_tags(item).strip()
        contents.append({'title': title, 'page': page})
    pageUrl = {}
    pages = page + 2
@ -163,7 +163,7 @@ def archiveIssues():
                    f.close()
                filename = '%s/Der Spiegel %d %02d.jpg' % (dirname, y, w)
                if not os.path.exists(filename):
-                    data = ox.cache.readUrl(issue['coverUrl'])
+                    data = ox.cache.read_url(issue['coverUrl'])
                    f = open(filename, 'w')
                    f.write(data)
                    f.close()
@ -172,7 +172,7 @@ def archiveIssues():
                    if url:
                        filename = '%s/Der Spiegel %d %02d %03d.jpg' % (dirname, y, w, page)
                        if not os.path.exists(filename):
-                            data = ox.cache.readUrl(url)
+                            data = ox.cache.read_url(url)
                            f = open(filename, 'w')
                            f.write(data)
                            f.close()
@ -243,7 +243,7 @@ def archiveNews():
                        f.close()
                    filename = dirname + '/' + new['imageUrl'].split('/')[-1]
                    if not os.path.exists(filename):
-                        data = ox.cache.readUrl(new['imageUrl'])
+                        data = ox.cache.read_url(new['imageUrl'])
                        f = open(filename, 'w')
                        f.write(data)
                        f.close()
--- a/ox/web/thepiratebay.py
+++ b/ox/web/thepiratebay.py
@ -6,8 +6,7 @@ import socket
 from urllib import quote, urlencode
 from urllib2 import URLError
-from ox.cache import readUrl, readUrlUnicode
+from ox import findRe, cache, strip_tags, decodeHtml, getTorrentInfo, normalizeNewlines
 from ox import findRe, cache, stripTags, decodeHtml, getTorrentInfo, normalizeNewlines
 from ox.normalize import normalizeImdbId
 import ox
@ -18,13 +17,10 @@ cache_timeout = 24*60*60 # cache search only for 24 hours
 season_episode = re.compile("S..E..", re.IGNORECASE)
-def _readUrl(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None):
+def read_url(url, data=None, headers=cache.DEFAULT_HEADERS, timeout=cache.cache_timeout, valid=None, unicode=False):
    headers = headers.copy()
    headers['Cookie'] = 'language=en_EN'
-    return cache.readUrl(url, data, headers, timeout)
+    return cache.read_url(url, data, headers, timeout, unicode=unicode)
 def _readUrlUnicode(url, timeout=cache.cache_timeout):
   return cache.readUrlUnicode(url, _readUrl=_readUrl, timeout=timeout)
 def findMovies(query, max_results=10):
    results = []
@ -37,7 +33,7 @@ def findMovies(query, max_results=10):
            if not url.startswith('/'):
                url = "/" + url
            url = "http://thepiratebay.org" + url
-        data = _readUrlUnicode(url, timeout=cache_timeout)
+        data = read_url(url, timeout=cache_timeout, unicode=True)
        regexp = '''<tr.*?<td class="vertTh"><a href="/browse/(.*?)".*?<td><a href="(/torrent/.*?)" class="detLink".*?>(.*?)</a>.*?</tr>'''
        for row in  re.compile(regexp, re.DOTALL).findall(data):
            torrentType = row[0]
@ -83,7 +79,7 @@ def getData(piratebayId):
    torrent[u'domain'] = 'thepiratebay.org'
    torrent[u'comment_link'] = 'http://thepiratebay.org/torrent/%s' % piratebayId
-    data = _readUrlUnicode(torrent['comment_link'])
+    data = read_url(torrent['comment_link'], unicode=True)
    torrent[u'title'] = findRe(data, '<title>(.*?) \(download torrent\) - TPB</title>')
    if not torrent[u'title']:
        return None
@ -94,12 +90,12 @@ def getData(piratebayId):
    for d in re.compile('dt>(.*?):</dt>.*?<dd.*?>(.*?)</dd>', re.DOTALL).findall(data):
        key = d[0].lower().strip()
        key = _key_map.get(key, key)
-        value = decodeHtml(stripTags(d[1].strip()))
+        value = decodeHtml(strip_tags(d[1].strip()))
        torrent[key] = value
    torrent[u'description'] = findRe(data, '<div class="nfo">(.*?)</div>')
    if torrent[u'description']:
-        torrent['description'] = normalizeNewlines(decodeHtml(stripTags(torrent['description']))).strip()
+        torrent['description'] = normalizeNewlines(decodeHtml(strip_tags(torrent['description']))).strip()
-    t = _readUrl(torrent[u'torrent_link'])
+    t = _read_url(torrent[u'torrent_link'])
    torrent[u'torrent_info'] = getTorrentInfo(t)
    return torrent
--- a/ox/web/torrent.py
+++ b/ox/web/torrent.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # vi:si:et:sw=4:sts=4:ts=4
-from ox import intValue
+from ox import int_value
 class Torrent(dict):
@ -25,7 +25,7 @@ class Torrent(dict):
        for key in self._int_keys:
            value = self.data.get(key, -1)
            if not isinstance(value, int):
-                value = int(intValue(value))
+                value = int(int_value(value))
            self[key] = value
        self['infohash'] = self.data['torrent_info'].get('hash', '')
        self['size'] = self.data['torrent_info'].get('size', -1)
--- a/ox/web/tv.py
+++ b/ox/web/tv.py
@ -3,8 +3,8 @@
 import re
 import time
-from ox import stripTags, findRe
+from ox import strip_tags, findRe
-from ox.cache import readUrlUnicode
+from ox.cache import read_url
 def getEpisodeData(url):
@ -14,9 +14,9 @@ def getEpisodeData(url):
      example:
        getEpisodeData('http://www.tv.com/lost/do-no-harm/episode/399310/summary.html')
    '''
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    r = {}
-    r['description'] = stripTags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
+    r['description'] = strip_tags(findRe(data, 'div id="main-col">.*?<div>(.*?)</div').split('\r')[0])
    r['show'] = findRe(data, '<h1>(.*?)</h1>')
    r['title'] =  findRe(data, '<title>.*?: (.*?) - TV.com  </title>')
    #episode score
--- a/ox/web/vimeo.py
+++ b/ox/web/vimeo.py
@ -4,13 +4,13 @@ import re
 from StringIO import StringIO
 import xml.etree.ElementTree as ET
-from ox.cache import readUrl, readUrlUnicode
+from ox.cache import read_url
 from ox import findString, findRe
 def getData(id):
    url = 'http://www.vimeo.com/moogaloop/load/clip:%s' %id
-    xml = readUrl(url)
+    xml = read_url(url)
    tree = ET.parse(StringIO(xml))
    request_signature = tree.find('request_signature').text
    request_signature_expires = tree.find('request_signature_expires').text
--- a/ox/web/wikipedia.py
+++ b/ox/web/wikipedia.py
@ -4,7 +4,7 @@ import re
 from urllib import urlencode
 from ox.utils import json
-from ox.cache import readUrl, readUrlUnicode
+from ox.cache import read_url
 from ox import findRe, decodeHtml
@ -47,7 +47,7 @@ def getUrlByAllmovieId(allmovieId):
 def getWikiData(wikipediaUrl):
    url = wikipediaUrl.replace('wikipedia.org/wiki/', 'wikipedia.org/w/index.php?title=')
    url = "%s&action=raw" % url
-    data = readUrl(url).decode('utf-8')
+    data = read_url(url).decode('utf-8')
    return data
 def getMovieData(wikipediaUrl):
@ -106,7 +106,7 @@ def getMovieData(wikipediaUrl):
 def getImageUrl(name):
    url = 'http://en.wikipedia.org/wiki/Image:' + name.replace(' ', '%20')
-    data = readUrlUnicode(url)
+    data = read_url(url, unicode=True)
    url = findRe(data, 'href="(http://upload.wikimedia.org/.*?)"')
    if not url:
        url = findRe(data, 'href="(//upload.wikimedia.org/.*?)"')
@ -133,9 +133,9 @@ def find(query, max_results=10):
    query = {'action': 'query', 'list':'search', 'format': 'json',
             'srlimit': max_results, 'srwhat': 'text', 'srsearch': query.encode('utf-8')}
    url = "http://en.wikipedia.org/w/api.php?" + urlencode(query)
-    data = readUrl(url)
+    data = read_url(url)
    if not data:
-        data  = readUrl(url, timeout=0)
+        data  = read_url(url, timeout=0)
    result = json.loads(data)
    results = []
    if result and 'query' in result:
--- a/ox/web/youtube.py
+++ b/ox/web/youtube.py
@ -5,7 +5,7 @@ import re
 from xml.dom.minidom import parseString
 import feedparser
-from ox.cache import readUrl, cache_timeout
+from ox.cache import read_url, cache_timeout
 def getVideoUrl(youtubeId, format='mp4', timeout=cache_timeout):
@ -33,7 +33,7 @@ def getVideoUrl(youtubeId, format='mp4', timeout=cache_timeout):
 def find(query, max_results=10, offset=1, orderBy='relevance'):
    query = quote(query)
    url = "http://gdata.youtube.com/feeds/api/videos?vq=%s&orderby=%s&start-index=%s&max-results=%s" % (query, orderBy, offset, max_results)
-    data = readUrl(url)
+    data = read_url(url)
    fd = feedparser.parse(data)
    videos = []
    for item in fd.entries:
@ -48,7 +48,7 @@ def find(query, max_results=10, offset=1, orderBy='relevance'):
 def info(id):
    info = {}
    url = "http://gdata.youtube.com/feeds/api/videos/%s?v=2" % id
-    data = readUrl(url)
+    data = read_url(url)
    xml = parseString(data)
    info['url'] = 'http://www.youtube.com/watch?v=%s' % id
    info['title'] = xml.getElementsByTagName('title')[0].firstChild.data
@ -62,21 +62,21 @@ def info(id):
    info['keywords'] = xml.getElementsByTagName('media:keywords')[0].firstChild.data.split(', ')
    url = "http://www.youtube.com/watch?v=%s" % id
-    data = readUrl(url)
+    data = read_url(url)
    match = re.compile('<h4>License:</h4>(.*?)</p>', re.DOTALL).findall(data)
    if match:
        info['license'] = match[0].strip()
        info['license'] = re.sub('<.+?>', '', info['license']).strip()
    url = "http://www.youtube.com/api/timedtext?hl=en&type=list&tlangs=1&v=%s&asrs=1"%id
-    data = readUrl(url)
+    data = read_url(url)
    xml = parseString(data)
    languages = [t.getAttribute('lang_code') for t in xml.getElementsByTagName('track')]
    if languages:
        info['subtitles'] = {}
        for language in languages:
            url = "http://www.youtube.com/api/timedtext?hl=en&v=%s&type=track&lang=%s&name&kind"%(id, language)
-            data = readUrl(url)
+            data = read_url(url)
            xml = parseString(data)
            subs = []
            for t in xml.getElementsByTagName('text'):
@ -101,7 +101,7 @@ def videos(id, format=''):
        'mp4': 'video/mp4'
    }.get(format)
    url = "http://www.youtube.com/watch?v=%s" % id
-    data = readUrl(url)
+    data = read_url(url)
    match = re.compile('"url_encoded_fmt_stream_map": "(.*?)"').findall(data)
    streams = {}
    for x in match[0].split(','):