port to python3
This commit is contained in:
parent
14f426afd4
commit
8e27b9f76e
51 changed files with 272 additions and 248 deletions
|
|
@ -1,17 +1,17 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import stdnum.isbn
|
||||
import ox
|
||||
|
||||
import abebooks
|
||||
import loc
|
||||
import lookupbyisbn
|
||||
import openlibrary
|
||||
import worldcat
|
||||
import google
|
||||
import duckduckgo
|
||||
from . import abebooks
|
||||
from . import loc
|
||||
from . import lookupbyisbn
|
||||
from . import openlibrary
|
||||
from . import worldcat
|
||||
from . import google
|
||||
from . import duckduckgo
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta')
|
||||
|
|
@ -51,22 +51,22 @@ def lookup(key, value):
|
|||
ids.append(kv)
|
||||
done = False
|
||||
logger.debug('FIXME: sort ids')
|
||||
ids.sort(key=lambda i: ox.sort_string(u''.join(i)))
|
||||
ids.sort(key=lambda i: ox.sort_string(''.join(i)))
|
||||
logger.debug('IDS %s', ids)
|
||||
for k, v in ids:
|
||||
for provider, id in providers:
|
||||
if id == k:
|
||||
if provider not in provider_data:
|
||||
provider_data[provider] = {}
|
||||
for k_, v_ in globals()[provider].lookup(v).iteritems():
|
||||
for k_, v_ in globals()[provider].lookup(v).items():
|
||||
if k_ not in provider_data[provider]:
|
||||
provider_data[provider][k_] = v_
|
||||
for provider in sorted(
|
||||
provider_data.keys(),
|
||||
list(provider_data.keys()),
|
||||
key=lambda x: -len(provider_data[x])
|
||||
):
|
||||
logger.debug('%s %s %s', provider, len(provider_data[provider]), provider_data[provider].keys())
|
||||
for k_, v_ in provider_data[provider].iteritems():
|
||||
logger.debug('%s %s %s', provider, len(provider_data[provider]), list(provider_data[provider].keys()))
|
||||
for k_, v_ in provider_data[provider].items():
|
||||
if not k_ in data:
|
||||
data[k_] = v_
|
||||
for k, v in ids:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import re
|
||||
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
|
||||
def get_classification(id):
|
||||
name = u'%s' % id
|
||||
name = '%s' % id
|
||||
base = ''.join([s for s in id.split('/')[0].split('.')[0] if s.isdigit()])
|
||||
if base in DEWEY:
|
||||
name = u'%s %s' % (name, DEWEY[base].decode('utf-8'))
|
||||
name = '%s %s' % (name, DEWEY[base].decode('utf-8'))
|
||||
return name
|
||||
|
||||
DEWEY = {
|
||||
|
|
@ -941,9 +941,9 @@ if __name__ == '__main__':
|
|||
dewey = {}
|
||||
for i in range(0, 1000):
|
||||
url = 'http://dewey.info/class/%s/about.en.json' % i
|
||||
print url
|
||||
print(url)
|
||||
data = json.loads(read_url(url))
|
||||
for d in data.values():
|
||||
for d in list(data.values()):
|
||||
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
|
||||
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
|
||||
dewey[str(i)] = value
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import ox.web.duckduckgo
|
||||
import stdnum.isbn
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import ox.web.google
|
||||
import stdnum.isbn
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
from ox.cache import read_url
|
||||
import ox
|
||||
import re
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from dewey import get_classification
|
||||
from marc_countries import COUNTRIES
|
||||
from utils import normalize_isbn
|
||||
from .dewey import get_classification
|
||||
from .marc_countries import COUNTRIES
|
||||
from .utils import normalize_isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.loc')
|
||||
|
|
@ -86,7 +86,7 @@ def lookup(id):
|
|||
toc = mods.findall(ns + 'tableOfContents')
|
||||
if toc:
|
||||
info['description'] = toc[0].text.strip()
|
||||
for key in info.keys():
|
||||
for key in list(info.keys()):
|
||||
if not info[key]:
|
||||
del info[key]
|
||||
return info
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import re
|
||||
|
||||
|
|
@ -8,7 +8,7 @@ from ox.cache import read_url
|
|||
from ox import find_re, strip_tags, decode_html
|
||||
import stdnum.isbn
|
||||
|
||||
from utils import find_isbns
|
||||
from .utils import find_isbns
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.lookupbyisbn')
|
||||
|
|
@ -78,13 +78,13 @@ def lookup(id):
|
|||
r['description'] = decode_html(strip_tags(desc))
|
||||
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
|
||||
for key in r:
|
||||
if isinstance(r[key], basestring):
|
||||
if isinstance(r[key], str):
|
||||
r[key] = decode_html(strip_tags(r[key])).strip()
|
||||
if 'author' in r and isinstance(r['author'], basestring) and r['author']:
|
||||
if 'author' in r and isinstance(r['author'], str) and r['author']:
|
||||
r['author'] = [r['author']]
|
||||
else:
|
||||
r['author'] = []
|
||||
if r['description'].lower() == u'Description of this item is not available at this time.'.lower():
|
||||
if r['description'].lower() == 'Description of this item is not available at this time.'.lower():
|
||||
r['description'] = ''
|
||||
return r
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
from datetime import datetime
|
||||
from urllib import urlencode
|
||||
from urllib.parse import urlencode
|
||||
import json
|
||||
|
||||
from ox.cache import read_url
|
||||
|
||||
from dewey import get_classification
|
||||
from marc_countries import COUNTRIES
|
||||
from utils import normalize_isbn
|
||||
from .dewey import get_classification
|
||||
from .marc_countries import COUNTRIES
|
||||
from .utils import normalize_isbn
|
||||
|
||||
import logging
|
||||
logger = logging.getLogger('meta.openlibrary')
|
||||
|
|
@ -41,7 +41,7 @@ def find(query):
|
|||
results = []
|
||||
ids = [b for b in r.get('result', []) if b.startswith('/books')]
|
||||
books = api.get_many(ids).get('result', [])
|
||||
for olid, value in books.iteritems():
|
||||
for olid, value in books.items():
|
||||
olid = olid.split('/')[-1]
|
||||
book = format(value)
|
||||
book['olid'] = [olid]
|
||||
|
|
@ -84,7 +84,7 @@ def lookup(id, return_all=False):
|
|||
data['olid'] = []
|
||||
if id not in data['olid']:
|
||||
data['olid'] = [id]
|
||||
logger.debug('lookup %s => %s', id, data.keys())
|
||||
logger.debug('lookup %s => %s', id, list(data.keys()))
|
||||
return data
|
||||
|
||||
def get_type(obj):
|
||||
|
|
@ -129,7 +129,7 @@ def format(info, return_all=False):
|
|||
elif key in ('isbn_10', 'isbn_13'):
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
value = map(normalize_isbn, value)
|
||||
value = list(map(normalize_isbn, value))
|
||||
if KEYS[key] in data:
|
||||
value = data[KEYS[key]] + value
|
||||
elif isinstance(value, list) and key not in ('publish_places', 'lccn', 'oclc_numbers'):
|
||||
|
|
@ -149,7 +149,7 @@ def format(info, return_all=False):
|
|||
def resolve_names(objects, key='name'):
|
||||
r = []
|
||||
data = api.get_many([k['key'] for k in objects]).get('result', {})
|
||||
for k, value in data.iteritems():
|
||||
for k, value in data.items():
|
||||
if 'location' in value and value.get('type', {}).get('key') == '/type/redirect':
|
||||
value = api.get(value['location']).get('result', {})
|
||||
r.append(value[key])
|
||||
|
|
@ -160,7 +160,7 @@ class API(object):
|
|||
|
||||
def _request(self, action, data, timeout=None):
|
||||
for key in data:
|
||||
if not isinstance(data[key], basestring):
|
||||
if not isinstance(data[key], str):
|
||||
data[key] = json.dumps(data[key])
|
||||
url = self.base + '/' + action + '?' + urlencode(data)
|
||||
if timeout is None:
|
||||
|
|
@ -181,7 +181,7 @@ class API(object):
|
|||
return data
|
||||
|
||||
def search(self, query):
|
||||
if isinstance(query, basestring):
|
||||
if isinstance(query, str):
|
||||
query = {
|
||||
'query': query
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import re
|
||||
import stdnum.isbn
|
||||
|
|
@ -10,6 +10,8 @@ def normalize_isbn(value):
|
|||
return ''.join([s for s in value if s.isdigit() or s == 'X'])
|
||||
|
||||
def find_isbns(text):
|
||||
if isinstance(text, bytes):
|
||||
text = text.decode()
|
||||
matches = re.compile('\d[\d\-X\ ]+').findall(text)
|
||||
matches = [normalize_isbn(value) for value in matches]
|
||||
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
|
|
@ -99,7 +99,7 @@ def lookup(id):
|
|||
if m:
|
||||
data['date'] = m[0]
|
||||
|
||||
logger.debug('lookup %s => %s', id, data.keys())
|
||||
logger.debug('lookup %s => %s', id, list(data.keys()))
|
||||
return data
|
||||
|
||||
info = lookup
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue