python3 migration

This commit is contained in:
j 2014-10-31 12:46:14 +01:00
parent 89a24dd1d4
commit b6faab1573
7 changed files with 12 additions and 12 deletions

View File

@ -16,7 +16,7 @@ def get_ids(key, value):
ids = []
if key == 'isbn':
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
data = read_url(url)
data = read_url(url, unicode=True)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
if urls:
ids.append((key, value))
@ -28,14 +28,14 @@ def lookup(id):
logger.debug('lookup %s', id)
data = {}
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
html = read_url(url)
html = read_url(url, unicode=True)
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
keys = {
'pubdate': 'date'
}
if urls:
details = '%s%s' % (base, urls[0])
html = read_url(details)
html = read_url(details, unicode=True)
doc = lxml.html.document_fromstring(html)
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
key = e.attrib['id'].replace('biblio-', '')

View File

@ -942,7 +942,7 @@ if __name__ == '__main__':
for i in range(0, 1000):
url = 'http://dewey.info/class/%s/about.en.json' % i
print(url)
data = json.loads(read_url(url))
data = json.loads(read_url(url).decode('utf-8'))
for d in list(data.values()):
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']

View File

@ -19,7 +19,7 @@ def get_ids(key, value):
ids = []
if key == 'isbn':
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
html = ox.cache.read_url(url)
html = ox.cache.read_url(url).decode('utf-8')
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
if match:
ids.append(('lccn', match.group(1)))
@ -37,7 +37,7 @@ def lookup(id):
logger.debug('lookup %s', id)
ns = '{http://www.loc.gov/mods/v3}'
url = 'http://lccn.loc.gov/%s/mods' % id
data = read_url(url)
data = read_url(url).decode('utf-8')
mods = ET.fromstring(data)
info = {

View File

@ -89,5 +89,5 @@ def lookup(id):
return r
def amazon_lookup(asin):
html = read_url('http://www.amazon.com/dp/%s' % asin)
html = read_url('http://www.amazon.com/dp/%s' % asin).decode('utf-8')
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))

View File

@ -392,7 +392,7 @@ if __name__ == '__main__':
from ox.cache import read_url
url = "http://www.loc.gov/marc/countries/countries_code.html"
data = read_url(url)
data = read_url(url).decode('utf-8')
countries = dict([
[ox.strip_tags(c) for c in r]
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)

View File

@ -78,7 +78,7 @@ def lookup(id, return_all=False):
logger.debug('lookup %s', id)
info = api.get('/books/' + id).get('result', {})
#url = 'https://openlibrary.org/books/%s.json' % id
#info = json.loads(read_url(url))
#info = json.loads(read_url(url).decode('utf-8'))
data = format(info, return_all)
if 'olid' not in data:
data['olid'] = []
@ -164,9 +164,9 @@ class API(object):
data[key] = json.dumps(data[key])
url = self.base + '/' + action + '?' + urlencode(data)
if timeout is None:
result = json.loads(read_url(url))
result = json.loads(read_url(url).decode('utf-8'))
else:
result = json.loads(read_url(url, timeout=timeout))
result = json.loads(read_url(url, timeout=timeout).decode('utf-8'))
if 'status' in result and result['status'] == 'error' or 'error' in result:
logger.info('FAILED %s %s', action, data)
logger.info('URL %s', url)

View File

@ -21,7 +21,7 @@ def get_ids(key, value):
ids = []
if key == 'isbn':
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
html = read_url(url)
html = read_url(url).decode('utf-8')
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
if matches:
info = lookup(matches[0])