From b6faab15731a614844eee504f70dc5ad81992d19 Mon Sep 17 00:00:00 2001 From: j Date: Fri, 31 Oct 2014 12:46:14 +0100 Subject: [PATCH] python3 migration --- oml/meta/abebooks.py | 6 +++--- oml/meta/dewey.py | 2 +- oml/meta/loc.py | 4 ++-- oml/meta/lookupbyisbn.py | 2 +- oml/meta/marc_countries.py | 2 +- oml/meta/openlibrary.py | 6 +++--- oml/meta/worldcat.py | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/oml/meta/abebooks.py b/oml/meta/abebooks.py index 8d3420d..ebb1352 100644 --- a/oml/meta/abebooks.py +++ b/oml/meta/abebooks.py @@ -16,7 +16,7 @@ def get_ids(key, value): ids = [] if key == 'isbn': url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id) - data = read_url(url) + data = read_url(url, unicode=True) urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data) if urls: ids.append((key, value)) @@ -28,14 +28,14 @@ def lookup(id): logger.debug('lookup %s', id) data = {} url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id) - html = read_url(url) + html = read_url(url, unicode=True) urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html) keys = { 'pubdate': 'date' } if urls: details = '%s%s' % (base, urls[0]) - html = read_url(details) + html = read_url(details, unicode=True) doc = lxml.html.document_fromstring(html) for e in doc.xpath("//*[contains(@id, 'biblio')]"): key = e.attrib['id'].replace('biblio-', '') diff --git a/oml/meta/dewey.py b/oml/meta/dewey.py index 033d527..fcbac58 100644 --- a/oml/meta/dewey.py +++ b/oml/meta/dewey.py @@ -942,7 +942,7 @@ if __name__ == '__main__': for i in range(0, 1000): url = 'http://dewey.info/class/%s/about.en.json' % i print(url) - data = json.loads(read_url(url)) + data = json.loads(read_url(url).decode('utf-8')) for d in list(data.values()): if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d: value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value'] diff --git a/oml/meta/loc.py b/oml/meta/loc.py index a171903..6bcae70 100644 --- a/oml/meta/loc.py +++ b/oml/meta/loc.py @@ -19,7 +19,7 @@ def get_ids(key, value): ids = [] if key == 'isbn': url = 'http://www.loc.gov/search/?q=%s&all=true' % value - html = ox.cache.read_url(url) + html = ox.cache.read_url(url).decode('utf-8') match = re.search('"http://lccn.loc.gov/(\d+)"', html) if match: ids.append(('lccn', match.group(1))) @@ -37,7 +37,7 @@ def lookup(id): logger.debug('lookup %s', id) ns = '{http://www.loc.gov/mods/v3}' url = 'http://lccn.loc.gov/%s/mods' % id - data = read_url(url) + data = read_url(url).decode('utf-8') mods = ET.fromstring(data) info = { diff --git a/oml/meta/lookupbyisbn.py b/oml/meta/lookupbyisbn.py index da6961c..0b9abdc 100644 --- a/oml/meta/lookupbyisbn.py +++ b/oml/meta/lookupbyisbn.py @@ -89,5 +89,5 @@ def lookup(id): return r def amazon_lookup(asin): - html = read_url('http://www.amazon.com/dp/%s' % asin) + html = read_url('http://www.amazon.com/dp/%s' % asin).decode('utf-8') return list(set(find_isbns(find_re(html, 'Formats.*?.*?class="code">(.*?).*?(.*?)', re.DOTALL).findall(data) diff --git a/oml/meta/openlibrary.py b/oml/meta/openlibrary.py index a64940e..7054a43 100644 --- a/oml/meta/openlibrary.py +++ b/oml/meta/openlibrary.py @@ -78,7 +78,7 @@ def lookup(id, return_all=False): logger.debug('lookup %s', id) info = api.get('/books/' + id).get('result', {}) #url = 'https://openlibrary.org/books/%s.json' % id - #info = json.loads(read_url(url)) + #info = json.loads(read_url(url).decode('utf-8')) data = format(info, return_all) if 'olid' not in data: data['olid'] = [] @@ -164,9 +164,9 @@ class API(object): data[key] = json.dumps(data[key]) url = self.base + '/' + action + '?' + urlencode(data) if timeout is None: - result = json.loads(read_url(url)) + result = json.loads(read_url(url).decode('utf-8')) else: - result = json.loads(read_url(url, timeout=timeout)) + result = json.loads(read_url(url, timeout=timeout).decode('utf-8')) if 'status' in result and result['status'] == 'error' or 'error' in result: logger.info('FAILED %s %s', action, data) logger.info('URL %s', url) diff --git a/oml/meta/worldcat.py b/oml/meta/worldcat.py index 35cda9e..da8108f 100644 --- a/oml/meta/worldcat.py +++ b/oml/meta/worldcat.py @@ -21,7 +21,7 @@ def get_ids(key, value): ids = [] if key == 'isbn': url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value) - html = read_url(url) + html = read_url(url).decode('utf-8') matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html) if matches: info = lookup(matches[0])