python3 migration
This commit is contained in:
parent
89a24dd1d4
commit
b6faab1573
7 changed files with 12 additions and 12 deletions
|
@ -16,7 +16,7 @@ def get_ids(key, value):
|
|||
ids = []
|
||||
if key == 'isbn':
|
||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||
data = read_url(url)
|
||||
data = read_url(url, unicode=True)
|
||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
||||
if urls:
|
||||
ids.append((key, value))
|
||||
|
@ -28,14 +28,14 @@ def lookup(id):
|
|||
logger.debug('lookup %s', id)
|
||||
data = {}
|
||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||
html = read_url(url)
|
||||
html = read_url(url, unicode=True)
|
||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
|
||||
keys = {
|
||||
'pubdate': 'date'
|
||||
}
|
||||
if urls:
|
||||
details = '%s%s' % (base, urls[0])
|
||||
html = read_url(details)
|
||||
html = read_url(details, unicode=True)
|
||||
doc = lxml.html.document_fromstring(html)
|
||||
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
|
||||
key = e.attrib['id'].replace('biblio-', '')
|
||||
|
|
|
@ -942,7 +942,7 @@ if __name__ == '__main__':
|
|||
for i in range(0, 1000):
|
||||
url = 'http://dewey.info/class/%s/about.en.json' % i
|
||||
print(url)
|
||||
data = json.loads(read_url(url))
|
||||
data = json.loads(read_url(url).decode('utf-8'))
|
||||
for d in list(data.values()):
|
||||
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
|
||||
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
|
||||
|
|
|
@ -19,7 +19,7 @@ def get_ids(key, value):
|
|||
ids = []
|
||||
if key == 'isbn':
|
||||
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
|
||||
html = ox.cache.read_url(url)
|
||||
html = ox.cache.read_url(url).decode('utf-8')
|
||||
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
|
||||
if match:
|
||||
ids.append(('lccn', match.group(1)))
|
||||
|
@ -37,7 +37,7 @@ def lookup(id):
|
|||
logger.debug('lookup %s', id)
|
||||
ns = '{http://www.loc.gov/mods/v3}'
|
||||
url = 'http://lccn.loc.gov/%s/mods' % id
|
||||
data = read_url(url)
|
||||
data = read_url(url).decode('utf-8')
|
||||
mods = ET.fromstring(data)
|
||||
|
||||
info = {
|
||||
|
|
|
@ -89,5 +89,5 @@ def lookup(id):
|
|||
return r
|
||||
|
||||
def amazon_lookup(asin):
|
||||
html = read_url('http://www.amazon.com/dp/%s' % asin)
|
||||
html = read_url('http://www.amazon.com/dp/%s' % asin).decode('utf-8')
|
||||
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
|
||||
|
|
|
@ -392,7 +392,7 @@ if __name__ == '__main__':
|
|||
from ox.cache import read_url
|
||||
|
||||
url = "http://www.loc.gov/marc/countries/countries_code.html"
|
||||
data = read_url(url)
|
||||
data = read_url(url).decode('utf-8')
|
||||
countries = dict([
|
||||
[ox.strip_tags(c) for c in r]
|
||||
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
|
||||
|
|
|
@ -78,7 +78,7 @@ def lookup(id, return_all=False):
|
|||
logger.debug('lookup %s', id)
|
||||
info = api.get('/books/' + id).get('result', {})
|
||||
#url = 'https://openlibrary.org/books/%s.json' % id
|
||||
#info = json.loads(read_url(url))
|
||||
#info = json.loads(read_url(url).decode('utf-8'))
|
||||
data = format(info, return_all)
|
||||
if 'olid' not in data:
|
||||
data['olid'] = []
|
||||
|
@ -164,9 +164,9 @@ class API(object):
|
|||
data[key] = json.dumps(data[key])
|
||||
url = self.base + '/' + action + '?' + urlencode(data)
|
||||
if timeout is None:
|
||||
result = json.loads(read_url(url))
|
||||
result = json.loads(read_url(url).decode('utf-8'))
|
||||
else:
|
||||
result = json.loads(read_url(url, timeout=timeout))
|
||||
result = json.loads(read_url(url, timeout=timeout).decode('utf-8'))
|
||||
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
||||
logger.info('FAILED %s %s', action, data)
|
||||
logger.info('URL %s', url)
|
||||
|
|
|
@ -21,7 +21,7 @@ def get_ids(key, value):
|
|||
ids = []
|
||||
if key == 'isbn':
|
||||
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
|
||||
html = read_url(url)
|
||||
html = read_url(url).decode('utf-8')
|
||||
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
|
||||
if matches:
|
||||
info = lookup(matches[0])
|
||||
|
|
Loading…
Reference in a new issue