diff --git a/oml/meta/loc.py b/oml/meta/loc.py index ebac04d..5283e83 100644 --- a/oml/meta/loc.py +++ b/oml/meta/loc.py @@ -19,7 +19,7 @@ def get_ids(key, value): ids = [] if key == 'isbn': url = 'http://www.loc.gov/search/?q=%s&all=true' % value - html = ox.cache.read_url(url).decode('utf-8') + html = ox.cache.read_url(url).decode('utf-8', 'ignore') match = re.search('"http://lccn.loc.gov/(\d+)"', html) if match: ids.append(('lccn', match.group(1))) diff --git a/oml/meta/openlibrary.py b/oml/meta/openlibrary.py index 7054a43..365d183 100644 --- a/oml/meta/openlibrary.py +++ b/oml/meta/openlibrary.py @@ -164,9 +164,15 @@ class API(object): data[key] = json.dumps(data[key]) url = self.base + '/' + action + '?' + urlencode(data) if timeout is None: - result = json.loads(read_url(url).decode('utf-8')) + r = read_url(url).decode('utf-8') + if '504 Gateway Time-out' in r: + r = read_url(url, timeout=-1).decode('utf-8') + result = json.loads(r) else: - result = json.loads(read_url(url, timeout=timeout).decode('utf-8')) + r = read_url(url, timeout).decode('utf-8') + if '504 Gateway Time-out' in r: + r = read_url(url, timeout=-1).decode('utf-8') + result = json.loads(r) if 'status' in result and result['status'] == 'error' or 'error' in result: logger.info('FAILED %s %s', action, data) logger.info('URL %s', url) diff --git a/oml/metaserver.py b/oml/metaserver.py index 5e35b33..42f0b9f 100644 --- a/oml/metaserver.py +++ b/oml/metaserver.py @@ -50,6 +50,7 @@ def getMetadata(data): key, value = next(iter(data.items())) if key == 'isbn': value = utils.normalize_isbn(value) + logger.debug('getMetadata key=%s value=%s', key, value) response = meta.lookup(key, value) if response: response['primaryid'] = [key, value]