use python logging
This commit is contained in:
parent
d9346e8328
commit
6a8a7b956d
27 changed files with 174 additions and 141 deletions
|
|
@ -2,6 +2,9 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta')
|
||||
|
||||
import abebooks
|
||||
import loc
|
||||
import lookupbyisbn
|
||||
|
|
@ -41,7 +44,7 @@ def lookup(key, value):
|
|||
if not kv in ids:
|
||||
ids.append(kv)
|
||||
done = False
|
||||
print 'lookup %s=%s =>' % ids[0], ids
|
||||
logger.debug('lookup %s=%s => %s' % ids[0][0], ids[0][1], ids)
|
||||
for k, v in ids:
|
||||
for provider, id in providers:
|
||||
if id == k and provider not in provider_data:
|
||||
|
|
@ -50,7 +53,7 @@ def lookup(key, value):
|
|||
provider_data.keys(),
|
||||
key=lambda x: -len(provider_data[x])
|
||||
):
|
||||
print provider, len(provider_data[provider]), provider_data[provider].keys()
|
||||
logger.debug('%s %s %s', provider, len(provider_data[provider]), provider_data[provider].keys())
|
||||
for k_, v_ in provider_data[provider].iteritems():
|
||||
if not k_ in data:
|
||||
data[k_] = v_
|
||||
|
|
|
|||
|
|
@ -1,7 +1,14 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
from ox.cache import read_url
|
||||
import re
|
||||
import lxml.html
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.abebooks')
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key in ('isbn10', 'isbn13'):
|
||||
|
|
@ -12,12 +19,11 @@ def get_ids(key, value):
|
|||
if urls:
|
||||
ids.append((key, value))
|
||||
if ids:
|
||||
print 'abebooks.get_ids', key, value
|
||||
print ids
|
||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
print 'abebooks.lookup', id
|
||||
logger.debug('lookup %s', id)
|
||||
return {}
|
||||
|
||||
def get_data(id):
|
||||
|
|
|
|||
|
|
@ -7,9 +7,12 @@ import stdnum.isbn
|
|||
|
||||
from .utils import find_isbns
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.duckduckgo')
|
||||
|
||||
|
||||
def find(title, author=None, publisher=None, date=None):
|
||||
print 'duckduckgo.find', title, author, publisher, date
|
||||
logger.debug('find %s %s %s %s', title, author, publisher, date)
|
||||
query = title
|
||||
if author:
|
||||
if isinstance(author, list):
|
||||
|
|
|
|||
|
|
@ -7,9 +7,12 @@ import stdnum.isbn
|
|||
|
||||
from .utils import find_isbns
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.google')
|
||||
|
||||
|
||||
def find(title, author=None, publisher=None, date=None):
|
||||
print 'google.find', title, author, publisher, date
|
||||
logger.debug('find %s %s %s %s', title, author, publisher, date)
|
||||
query = title
|
||||
if author:
|
||||
if isinstance(author, list):
|
||||
|
|
@ -19,7 +22,7 @@ def find(title, author=None, publisher=None, date=None):
|
|||
isbns = []
|
||||
for r in ox.web.google.find(query):
|
||||
isbns += find_isbns(' '.join(r))
|
||||
print isbns, 'google'
|
||||
logger.debug('isbns', isbns)
|
||||
results = []
|
||||
done = set()
|
||||
for isbn in isbns:
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ import xml.etree.ElementTree as ET
|
|||
from utils import normalize_isbn
|
||||
from marc_countries import COUNTRIES
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.loc')
|
||||
|
||||
def get_ids(key, value):
|
||||
ids = []
|
||||
if key in ['isbn10', 'isbn13']:
|
||||
|
|
@ -19,12 +22,11 @@ def get_ids(key, value):
|
|||
if match:
|
||||
ids.append(('lccn', match.group(1)))
|
||||
if ids:
|
||||
print 'loc.get_ids', key, value
|
||||
print ids
|
||||
logger.debug('get_ids %s,%s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
print 'loc.lookup', id
|
||||
logger.debug('lookup %s', id)
|
||||
ns = '{http://www.loc.gov/mods/v3}'
|
||||
url = 'http://lccn.loc.gov/%s/mods' % id
|
||||
data = read_url(url)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ from ox import find_re, strip_tags, decode_html
|
|||
import re
|
||||
import stdnum.isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.lookupbyisbn')
|
||||
|
||||
base = 'http://www.lookupbyisbn.com'
|
||||
|
||||
def get_ids(key, value):
|
||||
|
|
@ -20,12 +23,11 @@ def get_ids(key, value):
|
|||
if stdnum.isbn.is_valid(value):
|
||||
ids.append(('isbn10', value))
|
||||
if ids:
|
||||
print 'lookupbyisbn.get_ids', key, value
|
||||
print ids
|
||||
logger.debug('get_ids %s, %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
print 'lookupbyisbn.lookup', id
|
||||
logger.debug('lookup %s', id)
|
||||
r = {
|
||||
'asin': id
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ import json
|
|||
from marc_countries import COUNTRIES
|
||||
from utils import normalize_isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.openlibrary')
|
||||
|
||||
KEYS = {
|
||||
'authors': 'author',
|
||||
'covers': 'cover',
|
||||
|
|
@ -31,7 +34,7 @@ def find(*args, **kargs):
|
|||
args = [a.replace(':', ' ') for a in args]
|
||||
for k in ('date', 'publisher'):
|
||||
if k in kargs:
|
||||
print 'ignoring %s on openlibrary' % k, kargs[k]
|
||||
logger.debug('ignoring %s on openlibrary %s', k, kargs[k])
|
||||
del kargs[k]
|
||||
for k, v in kargs.iteritems():
|
||||
key = KEYS.keys()[KEYS.values().index(k)]
|
||||
|
|
@ -43,7 +46,7 @@ def find(*args, **kargs):
|
|||
args += v
|
||||
query = ' '.join(args)
|
||||
query = query.strip()
|
||||
print 'openlibrary.find', query
|
||||
logger.debug('find %s', query)
|
||||
r = api.search(query)
|
||||
results = []
|
||||
ids = [b for b in r.get('result', []) if b.startswith('/books')]
|
||||
|
|
@ -66,7 +69,7 @@ def get_ids(key, value):
|
|||
if (id, v) not in ids:
|
||||
ids.append((id, v))
|
||||
elif key in ('isbn10', 'isbn13', 'oclc', 'lccn'):
|
||||
print 'openlibraryid.get_ids', key, value
|
||||
logger.debug('openlibraryid.get_ids %s %s', key, value)
|
||||
r = api.things({'type': '/type/edition', key.replace('isbn', 'isbn_'): value})
|
||||
for b in r.get('result', []):
|
||||
if b.startswith('/books'):
|
||||
|
|
@ -75,8 +78,7 @@ def get_ids(key, value):
|
|||
if kv not in ids:
|
||||
ids.append(kv)
|
||||
if ids:
|
||||
print 'openlibraryid.get_ids', key, value
|
||||
print ids
|
||||
logger.debug('get_ids %s %s => %s', key, value, ids)
|
||||
return ids
|
||||
|
||||
def lookup(id, return_all=False):
|
||||
|
|
@ -86,7 +88,7 @@ def lookup(id, return_all=False):
|
|||
#info = json.loads(read_url(url))
|
||||
data = format(info, return_all)
|
||||
data['olid'] = id
|
||||
print 'openlibrary.lookup', id, data.keys()
|
||||
logger.debug('lookup %s => %s', id, data.keys())
|
||||
return data
|
||||
|
||||
def format(info, return_all=False):
|
||||
|
|
@ -133,8 +135,8 @@ class API(object):
|
|||
url = self.base + '/' + action + '?' + urlencode(data)
|
||||
result = json.loads(read_url(url))
|
||||
if 'status' in result and result['status'] == 'error' or 'error' in result:
|
||||
print 'FAILED', action, data
|
||||
print 'URL', url
|
||||
logger.info('FAILED %s %s', action, data)
|
||||
logger.info('URL %s', url)
|
||||
return result
|
||||
|
||||
def get(self, key):
|
||||
|
|
@ -152,7 +154,7 @@ class API(object):
|
|||
}
|
||||
data = self._request('search', {'q': query})
|
||||
if 'status' in data and data['status'] == 'error':
|
||||
print 'FAILED', query
|
||||
logger.info('FAILED %s', query)
|
||||
return data
|
||||
|
||||
def things(self, query):
|
||||
|
|
|
|||
|
|
@ -1,32 +0,0 @@
|
|||
import json
|
||||
from ox.cache import read_url
|
||||
import ox.web.lookupbyisbn
|
||||
|
||||
from utils import normalize_isbn
|
||||
|
||||
import openlibrary as ol
|
||||
|
||||
def add_lookupbyisbn(item):
|
||||
isbn = item.meta.get('isbn10', item.meta.get('isbn13'))
|
||||
if isbn:
|
||||
more = ox.web.lookupbyisbn.get_data(isbn)
|
||||
if more:
|
||||
for key in more:
|
||||
if more[key]:
|
||||
value = more[key]
|
||||
if isinstance(value, basestring):
|
||||
value = ox.strip_tags(ox.decode_html(value))
|
||||
elif isinstance(value, list):
|
||||
value = [ox.strip_tags(ox.decode_html(v)) for v in value]
|
||||
item.meta[key] = value
|
||||
|
||||
if 'author' in item.meta and isinstance(item.meta['author'], basestring):
|
||||
item.meta['author'] = [item.meta['author']]
|
||||
if 'isbn' in item.meta:
|
||||
del item.meta['isbn']
|
||||
|
||||
def update_ol(item):
|
||||
info = ol.info(item.meta['olid'])
|
||||
for key in info:
|
||||
item.meta[key] = info[key]
|
||||
|
||||
|
|
@ -8,6 +8,9 @@ import re
|
|||
from utils import normalize_isbn
|
||||
import stdnum.isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.worldcat')
|
||||
|
||||
base_url = 'http://www.worldcat.org'
|
||||
|
||||
def get_ids(key, value):
|
||||
|
|
@ -28,8 +31,8 @@ def get_ids(key, value):
|
|||
if k in info:
|
||||
ids.append((k, info[k]))
|
||||
if ids:
|
||||
print 'worldcat.get_ids', key, value
|
||||
print ids
|
||||
logger.debug('get_ids %s %s', key, value)
|
||||
logger.debug('%s', ids)
|
||||
return ids
|
||||
|
||||
def lookup(id):
|
||||
|
|
@ -58,8 +61,7 @@ def lookup(id):
|
|||
data['isbn%d'%len(isbn)] = isbn
|
||||
if 'author' in data:
|
||||
data['author'] = [data['author']]
|
||||
print 'worldcat.lookup', id
|
||||
print data.keys()
|
||||
logger.debug('lookup %s => %s', id, data.keys())
|
||||
return data
|
||||
|
||||
info = lookup
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue