use python logging

This commit is contained in:
j 2014-05-17 16:26:59 +02:00
commit 6a8a7b956d
27 changed files with 174 additions and 141 deletions

View file

@ -2,6 +2,9 @@
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import logging
logger = logging.getLogger('meta')
import abebooks
import loc
import lookupbyisbn
@ -41,7 +44,7 @@ def lookup(key, value):
if not kv in ids:
ids.append(kv)
done = False
print 'lookup %s=%s =>' % ids[0], ids
logger.debug('lookup %s=%s => %s' % ids[0][0], ids[0][1], ids)
for k, v in ids:
for provider, id in providers:
if id == k and provider not in provider_data:
@ -50,7 +53,7 @@ def lookup(key, value):
provider_data.keys(),
key=lambda x: -len(provider_data[x])
):
print provider, len(provider_data[provider]), provider_data[provider].keys()
logger.debug('%s %s %s', provider, len(provider_data[provider]), provider_data[provider].keys())
for k_, v_ in provider_data[provider].iteritems():
if not k_ in data:
data[k_] = v_

View file

@ -1,7 +1,14 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
from ox.cache import read_url
import re
import lxml.html
import logging
logger = logging.getLogger('meta.abebooks')
def get_ids(key, value):
ids = []
if key in ('isbn10', 'isbn13'):
@ -12,12 +19,11 @@ def get_ids(key, value):
if urls:
ids.append((key, value))
if ids:
print 'abebooks.get_ids', key, value
print ids
logger.debug('get_ids %s %s => %s', key, value, ids)
return ids
def lookup(id):
print 'abebooks.lookup', id
logger.debug('lookup %s', id)
return {}
def get_data(id):

View file

@ -7,9 +7,12 @@ import stdnum.isbn
from .utils import find_isbns
import logging
logger = logging.getLogger('meta.duckduckgo')
def find(title, author=None, publisher=None, date=None):
print 'duckduckgo.find', title, author, publisher, date
logger.debug('find %s %s %s %s', title, author, publisher, date)
query = title
if author:
if isinstance(author, list):

View file

@ -7,9 +7,12 @@ import stdnum.isbn
from .utils import find_isbns
import logging
logger = logging.getLogger('meta.google')
def find(title, author=None, publisher=None, date=None):
print 'google.find', title, author, publisher, date
logger.debug('find %s %s %s %s', title, author, publisher, date)
query = title
if author:
if isinstance(author, list):
@ -19,7 +22,7 @@ def find(title, author=None, publisher=None, date=None):
isbns = []
for r in ox.web.google.find(query):
isbns += find_isbns(' '.join(r))
print isbns, 'google'
logger.debug('isbns', isbns)
results = []
done = set()
for isbn in isbns:

View file

@ -10,6 +10,9 @@ import xml.etree.ElementTree as ET
from utils import normalize_isbn
from marc_countries import COUNTRIES
import logging
logger = logging.getLogger('meta.loc')
def get_ids(key, value):
ids = []
if key in ['isbn10', 'isbn13']:
@ -19,12 +22,11 @@ def get_ids(key, value):
if match:
ids.append(('lccn', match.group(1)))
if ids:
print 'loc.get_ids', key, value
print ids
logger.debug('get_ids %s,%s => %s', key, value, ids)
return ids
def lookup(id):
print 'loc.lookup', id
logger.debug('lookup %s', id)
ns = '{http://www.loc.gov/mods/v3}'
url = 'http://lccn.loc.gov/%s/mods' % id
data = read_url(url)

View file

@ -3,6 +3,9 @@ from ox import find_re, strip_tags, decode_html
import re
import stdnum.isbn
import logging
logger = logging.getLogger('meta.lookupbyisbn')
base = 'http://www.lookupbyisbn.com'
def get_ids(key, value):
@ -20,12 +23,11 @@ def get_ids(key, value):
if stdnum.isbn.is_valid(value):
ids.append(('isbn10', value))
if ids:
print 'lookupbyisbn.get_ids', key, value
print ids
logger.debug('get_ids %s, %s => %s', key, value, ids)
return ids
def lookup(id):
print 'lookupbyisbn.lookup', id
logger.debug('lookup %s', id)
r = {
'asin': id
}

View file

@ -9,6 +9,9 @@ import json
from marc_countries import COUNTRIES
from utils import normalize_isbn
import logging
logger = logging.getLogger('meta.openlibrary')
KEYS = {
'authors': 'author',
'covers': 'cover',
@ -31,7 +34,7 @@ def find(*args, **kargs):
args = [a.replace(':', ' ') for a in args]
for k in ('date', 'publisher'):
if k in kargs:
print 'ignoring %s on openlibrary' % k, kargs[k]
logger.debug('ignoring %s on openlibrary %s', k, kargs[k])
del kargs[k]
for k, v in kargs.iteritems():
key = KEYS.keys()[KEYS.values().index(k)]
@ -43,7 +46,7 @@ def find(*args, **kargs):
args += v
query = ' '.join(args)
query = query.strip()
print 'openlibrary.find', query
logger.debug('find %s', query)
r = api.search(query)
results = []
ids = [b for b in r.get('result', []) if b.startswith('/books')]
@ -66,7 +69,7 @@ def get_ids(key, value):
if (id, v) not in ids:
ids.append((id, v))
elif key in ('isbn10', 'isbn13', 'oclc', 'lccn'):
print 'openlibraryid.get_ids', key, value
logger.debug('openlibraryid.get_ids %s %s', key, value)
r = api.things({'type': '/type/edition', key.replace('isbn', 'isbn_'): value})
for b in r.get('result', []):
if b.startswith('/books'):
@ -75,8 +78,7 @@ def get_ids(key, value):
if kv not in ids:
ids.append(kv)
if ids:
print 'openlibraryid.get_ids', key, value
print ids
logger.debug('get_ids %s %s => %s', key, value, ids)
return ids
def lookup(id, return_all=False):
@ -86,7 +88,7 @@ def lookup(id, return_all=False):
#info = json.loads(read_url(url))
data = format(info, return_all)
data['olid'] = id
print 'openlibrary.lookup', id, data.keys()
logger.debug('lookup %s => %s', id, data.keys())
return data
def format(info, return_all=False):
@ -133,8 +135,8 @@ class API(object):
url = self.base + '/' + action + '?' + urlencode(data)
result = json.loads(read_url(url))
if 'status' in result and result['status'] == 'error' or 'error' in result:
print 'FAILED', action, data
print 'URL', url
logger.info('FAILED %s %s', action, data)
logger.info('URL %s', url)
return result
def get(self, key):
@ -152,7 +154,7 @@ class API(object):
}
data = self._request('search', {'q': query})
if 'status' in data and data['status'] == 'error':
print 'FAILED', query
logger.info('FAILED %s', query)
return data
def things(self, query):

View file

@ -1,32 +0,0 @@
import json
from ox.cache import read_url
import ox.web.lookupbyisbn
from utils import normalize_isbn
import openlibrary as ol
def add_lookupbyisbn(item):
isbn = item.meta.get('isbn10', item.meta.get('isbn13'))
if isbn:
more = ox.web.lookupbyisbn.get_data(isbn)
if more:
for key in more:
if more[key]:
value = more[key]
if isinstance(value, basestring):
value = ox.strip_tags(ox.decode_html(value))
elif isinstance(value, list):
value = [ox.strip_tags(ox.decode_html(v)) for v in value]
item.meta[key] = value
if 'author' in item.meta and isinstance(item.meta['author'], basestring):
item.meta['author'] = [item.meta['author']]
if 'isbn' in item.meta:
del item.meta['isbn']
def update_ol(item):
info = ol.info(item.meta['olid'])
for key in info:
item.meta[key] = info[key]

View file

@ -8,6 +8,9 @@ import re
from utils import normalize_isbn
import stdnum.isbn
import logging
logger = logging.getLogger('meta.worldcat')
base_url = 'http://www.worldcat.org'
def get_ids(key, value):
@ -28,8 +31,8 @@ def get_ids(key, value):
if k in info:
ids.append((k, info[k]))
if ids:
print 'worldcat.get_ids', key, value
print ids
logger.debug('get_ids %s %s', key, value)
logger.debug('%s', ids)
return ids
def lookup(id):
@ -58,8 +61,7 @@ def lookup(id):
data['isbn%d'%len(isbn)] = isbn
if 'author' in data:
data['author'] = [data['author']]
print 'worldcat.lookup', id
print data.keys()
logger.debug('lookup %s => %s', id, data.keys())
return data
info = lookup