python3 migration

2014-10-31 12:46:14 +01:00 · 2014-10-31 12:46:14 +01:00 · b6faab1573
commit b6faab1573
parent 89a24dd1d4
7 changed files with 12 additions and 12 deletions
--- a/oml/meta/abebooks.py
+++ b/oml/meta/abebooks.py
@ -16,7 +16,7 @@ def get_ids(key, value):
    ids = []
    if key == 'isbn':
        url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
-        data = read_url(url)
+        data = read_url(url, unicode=True)
        urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
        if urls:
            ids.append((key, value))
@ -28,14 +28,14 @@ def lookup(id):
    logger.debug('lookup %s', id)
    data = {}
    url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
-    html = read_url(url)
+    html = read_url(url, unicode=True)
    urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
    keys = {
        'pubdate': 'date'
    }
    if urls:
        details = '%s%s' % (base, urls[0])
-        html = read_url(details)
+        html = read_url(details, unicode=True)
        doc = lxml.html.document_fromstring(html)
        for e in doc.xpath("//*[contains(@id, 'biblio')]"):
            key = e.attrib['id'].replace('biblio-', '')
--- a/oml/meta/dewey.py
+++ b/oml/meta/dewey.py
@ -942,7 +942,7 @@ if __name__ == '__main__':
    for i in range(0, 1000):
        url = 'http://dewey.info/class/%s/about.en.json' % i
        print(url)
-        data = json.loads(read_url(url))
+        data = json.loads(read_url(url).decode('utf-8'))
        for d in list(data.values()):
            if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
                value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
--- a/oml/meta/loc.py
+++ b/oml/meta/loc.py
@ -19,7 +19,7 @@ def get_ids(key, value):
    ids = []
    if key == 'isbn':
        url = 'http://www.loc.gov/search/?q=%s&all=true' % value
-        html = ox.cache.read_url(url)
+        html = ox.cache.read_url(url).decode('utf-8')
        match = re.search('"http://lccn.loc.gov/(\d+)"', html)
        if match:
            ids.append(('lccn', match.group(1)))
@ -37,7 +37,7 @@ def lookup(id):
    logger.debug('lookup %s', id)
    ns = '{http://www.loc.gov/mods/v3}'
    url = 'http://lccn.loc.gov/%s/mods' % id
-    data = read_url(url)
+    data = read_url(url).decode('utf-8')
    mods = ET.fromstring(data)

    info = {
--- a/oml/meta/lookupbyisbn.py
+++ b/oml/meta/lookupbyisbn.py
@ -89,5 +89,5 @@ def lookup(id):
    return r

 def amazon_lookup(asin):
-    html = read_url('http://www.amazon.com/dp/%s' % asin)
+    html = read_url('http://www.amazon.com/dp/%s' % asin).decode('utf-8')
    return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
--- a/oml/meta/marc_countries.py
+++ b/oml/meta/marc_countries.py
@ -392,7 +392,7 @@ if __name__ == '__main__':
    from ox.cache import read_url

    url = "http://www.loc.gov/marc/countries/countries_code.html"
-    data = read_url(url)
+    data = read_url(url).decode('utf-8')
    countries = dict([
        [ox.strip_tags(c) for c in r]
        for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
--- a/oml/meta/openlibrary.py
+++ b/oml/meta/openlibrary.py
@ -78,7 +78,7 @@ def lookup(id, return_all=False):
    logger.debug('lookup %s', id)
    info = api.get('/books/' + id).get('result', {})
    #url = 'https://openlibrary.org/books/%s.json' % id
-    #info = json.loads(read_url(url))
+    #info = json.loads(read_url(url).decode('utf-8'))
    data = format(info, return_all)
    if 'olid' not in data:
        data['olid'] = []
@ -164,9 +164,9 @@ class API(object):
                data[key] = json.dumps(data[key])
        url = self.base + '/' + action + '?' + urlencode(data)
        if timeout is None:
-            result = json.loads(read_url(url))
+            result = json.loads(read_url(url).decode('utf-8'))
        else:
-            result = json.loads(read_url(url, timeout=timeout))
+            result = json.loads(read_url(url, timeout=timeout).decode('utf-8'))
        if 'status' in result and result['status'] == 'error' or 'error' in result:
            logger.info('FAILED %s %s', action, data)
            logger.info('URL %s', url)
--- a/oml/meta/worldcat.py
+++ b/oml/meta/worldcat.py
@ -21,7 +21,7 @@ def get_ids(key, value):
    ids = []
    if key == 'isbn':
        url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
-        html = read_url(url)
+        html = read_url(url).decode('utf-8')
        matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
        if matches:
            info = lookup(matches[0])