python3 migration
This commit is contained in:
parent
89a24dd1d4
commit
b6faab1573
7 changed files with 12 additions and 12 deletions
|
@ -16,7 +16,7 @@ def get_ids(key, value):
|
||||||
ids = []
|
ids = []
|
||||||
if key == 'isbn':
|
if key == 'isbn':
|
||||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||||
data = read_url(url)
|
data = read_url(url, unicode=True)
|
||||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(data)
|
||||||
if urls:
|
if urls:
|
||||||
ids.append((key, value))
|
ids.append((key, value))
|
||||||
|
@ -28,14 +28,14 @@ def lookup(id):
|
||||||
logger.debug('lookup %s', id)
|
logger.debug('lookup %s', id)
|
||||||
data = {}
|
data = {}
|
||||||
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
url = '%s/servlet/SearchResults?isbn=%s&sts=t' % (base, id)
|
||||||
html = read_url(url)
|
html = read_url(url, unicode=True)
|
||||||
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
|
urls = re.compile('href="(/servlet/BookDetailsPL[^"]+)"').findall(html)
|
||||||
keys = {
|
keys = {
|
||||||
'pubdate': 'date'
|
'pubdate': 'date'
|
||||||
}
|
}
|
||||||
if urls:
|
if urls:
|
||||||
details = '%s%s' % (base, urls[0])
|
details = '%s%s' % (base, urls[0])
|
||||||
html = read_url(details)
|
html = read_url(details, unicode=True)
|
||||||
doc = lxml.html.document_fromstring(html)
|
doc = lxml.html.document_fromstring(html)
|
||||||
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
|
for e in doc.xpath("//*[contains(@id, 'biblio')]"):
|
||||||
key = e.attrib['id'].replace('biblio-', '')
|
key = e.attrib['id'].replace('biblio-', '')
|
||||||
|
|
|
@ -942,7 +942,7 @@ if __name__ == '__main__':
|
||||||
for i in range(0, 1000):
|
for i in range(0, 1000):
|
||||||
url = 'http://dewey.info/class/%s/about.en.json' % i
|
url = 'http://dewey.info/class/%s/about.en.json' % i
|
||||||
print(url)
|
print(url)
|
||||||
data = json.loads(read_url(url))
|
data = json.loads(read_url(url).decode('utf-8'))
|
||||||
for d in list(data.values()):
|
for d in list(data.values()):
|
||||||
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
|
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
|
||||||
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
|
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
|
||||||
|
|
|
@ -19,7 +19,7 @@ def get_ids(key, value):
|
||||||
ids = []
|
ids = []
|
||||||
if key == 'isbn':
|
if key == 'isbn':
|
||||||
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
|
url = 'http://www.loc.gov/search/?q=%s&all=true' % value
|
||||||
html = ox.cache.read_url(url)
|
html = ox.cache.read_url(url).decode('utf-8')
|
||||||
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
|
match = re.search('"http://lccn.loc.gov/(\d+)"', html)
|
||||||
if match:
|
if match:
|
||||||
ids.append(('lccn', match.group(1)))
|
ids.append(('lccn', match.group(1)))
|
||||||
|
@ -37,7 +37,7 @@ def lookup(id):
|
||||||
logger.debug('lookup %s', id)
|
logger.debug('lookup %s', id)
|
||||||
ns = '{http://www.loc.gov/mods/v3}'
|
ns = '{http://www.loc.gov/mods/v3}'
|
||||||
url = 'http://lccn.loc.gov/%s/mods' % id
|
url = 'http://lccn.loc.gov/%s/mods' % id
|
||||||
data = read_url(url)
|
data = read_url(url).decode('utf-8')
|
||||||
mods = ET.fromstring(data)
|
mods = ET.fromstring(data)
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
|
|
|
@ -89,5 +89,5 @@ def lookup(id):
|
||||||
return r
|
return r
|
||||||
|
|
||||||
def amazon_lookup(asin):
|
def amazon_lookup(asin):
|
||||||
html = read_url('http://www.amazon.com/dp/%s' % asin)
|
html = read_url('http://www.amazon.com/dp/%s' % asin).decode('utf-8')
|
||||||
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
|
return list(set(find_isbns(find_re(html, 'Formats</h3>.*?</table'))))
|
||||||
|
|
|
@ -392,7 +392,7 @@ if __name__ == '__main__':
|
||||||
from ox.cache import read_url
|
from ox.cache import read_url
|
||||||
|
|
||||||
url = "http://www.loc.gov/marc/countries/countries_code.html"
|
url = "http://www.loc.gov/marc/countries/countries_code.html"
|
||||||
data = read_url(url)
|
data = read_url(url).decode('utf-8')
|
||||||
countries = dict([
|
countries = dict([
|
||||||
[ox.strip_tags(c) for c in r]
|
[ox.strip_tags(c) for c in r]
|
||||||
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
|
for r in re.compile('<tr>.*?class="code">(.*?)</td>.*?<td>(.*?)</td>', re.DOTALL).findall(data)
|
||||||
|
|
|
@ -78,7 +78,7 @@ def lookup(id, return_all=False):
|
||||||
logger.debug('lookup %s', id)
|
logger.debug('lookup %s', id)
|
||||||
info = api.get('/books/' + id).get('result', {})
|
info = api.get('/books/' + id).get('result', {})
|
||||||
#url = 'https://openlibrary.org/books/%s.json' % id
|
#url = 'https://openlibrary.org/books/%s.json' % id
|
||||||
#info = json.loads(read_url(url))
|
#info = json.loads(read_url(url).decode('utf-8'))
|
||||||
data = format(info, return_all)
|
data = format(info, return_all)
|
||||||
if 'olid' not in data:
|
if 'olid' not in data:
|
||||||
data['olid'] = []
|
data['olid'] = []
|
||||||
|
@ -164,9 +164,9 @@ class API(object):
|
||||||
data[key] = json.dumps(data[key])
|
data[key] = json.dumps(data[key])
|
||||||
url = self.base + '/' + action + '?' + urlencode(data)
|
url = self.base + '/' + action + '?' + urlencode(data)
|
||||||
if timeout is None:
|
if timeout is None:
|
||||||
result = json.loads(read_url(url))
|
result = json.loads(read_url(url).decode('utf-8'))
|
||||||
else:
|
else:
|
||||||
result = json.loads(read_url(url, timeout=timeout))
|
result = json.loads(read_url(url, timeout=timeout).decode('utf-8'))
|
||||||
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
||||||
logger.info('FAILED %s %s', action, data)
|
logger.info('FAILED %s %s', action, data)
|
||||||
logger.info('URL %s', url)
|
logger.info('URL %s', url)
|
||||||
|
|
|
@ -21,7 +21,7 @@ def get_ids(key, value):
|
||||||
ids = []
|
ids = []
|
||||||
if key == 'isbn':
|
if key == 'isbn':
|
||||||
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
|
url = '%s/search?qt=worldcat_org_bks&q=%s' % (base_url, value)
|
||||||
html = read_url(url)
|
html = read_url(url).decode('utf-8')
|
||||||
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
|
matches = re.compile('/title.*?oclc/(\d+).*?"').findall(html)
|
||||||
if matches:
|
if matches:
|
||||||
info = lookup(matches[0])
|
info = lookup(matches[0])
|
||||||
|
|
Loading…
Reference in a new issue