port to python3

This commit is contained in:
j 2014-09-03 00:32:44 +02:00
commit 8e27b9f76e
51 changed files with 272 additions and 248 deletions

View file

@ -1,17 +1,17 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import stdnum.isbn
import ox
import abebooks
import loc
import lookupbyisbn
import openlibrary
import worldcat
import google
import duckduckgo
from . import abebooks
from . import loc
from . import lookupbyisbn
from . import openlibrary
from . import worldcat
from . import google
from . import duckduckgo
import logging
logger = logging.getLogger('meta')
@ -51,22 +51,22 @@ def lookup(key, value):
ids.append(kv)
done = False
logger.debug('FIXME: sort ids')
ids.sort(key=lambda i: ox.sort_string(u''.join(i)))
ids.sort(key=lambda i: ox.sort_string(''.join(i)))
logger.debug('IDS %s', ids)
for k, v in ids:
for provider, id in providers:
if id == k:
if provider not in provider_data:
provider_data[provider] = {}
for k_, v_ in globals()[provider].lookup(v).iteritems():
for k_, v_ in globals()[provider].lookup(v).items():
if k_ not in provider_data[provider]:
provider_data[provider][k_] = v_
for provider in sorted(
provider_data.keys(),
list(provider_data.keys()),
key=lambda x: -len(provider_data[x])
):
logger.debug('%s %s %s', provider, len(provider_data[provider]), provider_data[provider].keys())
for k_, v_ in provider_data[provider].iteritems():
logger.debug('%s %s %s', provider, len(provider_data[provider]), list(provider_data[provider].keys()))
for k_, v_ in provider_data[provider].items():
if not k_ in data:
data[k_] = v_
for k, v in ids:

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import re

View file

@ -2,10 +2,10 @@
# vi:si:et:sw=4:sts=4:ts=4
def get_classification(id):
name = u'%s' % id
name = '%s' % id
base = ''.join([s for s in id.split('/')[0].split('.')[0] if s.isdigit()])
if base in DEWEY:
name = u'%s %s' % (name, DEWEY[base].decode('utf-8'))
name = '%s %s' % (name, DEWEY[base].decode('utf-8'))
return name
DEWEY = {
@ -941,9 +941,9 @@ if __name__ == '__main__':
dewey = {}
for i in range(0, 1000):
url = 'http://dewey.info/class/%s/about.en.json' % i
print url
print(url)
data = json.loads(read_url(url))
for d in data.values():
for d in list(data.values()):
if 'http://www.w3.org/2004/02/skos/core#prefLabel' in d:
value = d['http://www.w3.org/2004/02/skos/core#prefLabel'][0]['value']
dewey[str(i)] = value

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import ox.web.duckduckgo
import stdnum.isbn

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import ox.web.google
import stdnum.isbn

View file

@ -1,15 +1,15 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
from ox.cache import read_url
import ox
import re
import xml.etree.ElementTree as ET
from dewey import get_classification
from marc_countries import COUNTRIES
from utils import normalize_isbn
from .dewey import get_classification
from .marc_countries import COUNTRIES
from .utils import normalize_isbn
import logging
logger = logging.getLogger('meta.loc')
@ -86,7 +86,7 @@ def lookup(id):
toc = mods.findall(ns + 'tableOfContents')
if toc:
info['description'] = toc[0].text.strip()
for key in info.keys():
for key in list(info.keys()):
if not info[key]:
del info[key]
return info

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import re
@ -8,7 +8,7 @@ from ox.cache import read_url
from ox import find_re, strip_tags, decode_html
import stdnum.isbn
from utils import find_isbns
from .utils import find_isbns
import logging
logger = logging.getLogger('meta.lookupbyisbn')
@ -78,13 +78,13 @@ def lookup(id):
r['description'] = decode_html(strip_tags(desc))
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
for key in r:
if isinstance(r[key], basestring):
if isinstance(r[key], str):
r[key] = decode_html(strip_tags(r[key])).strip()
if 'author' in r and isinstance(r['author'], basestring) and r['author']:
if 'author' in r and isinstance(r['author'], str) and r['author']:
r['author'] = [r['author']]
else:
r['author'] = []
if r['description'].lower() == u'Description of this item is not available at this time.'.lower():
if r['description'].lower() == 'Description of this item is not available at this time.'.lower():
r['description'] = ''
return r

View file

@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
from datetime import datetime
from urllib import urlencode
from urllib.parse import urlencode
import json
from ox.cache import read_url
from dewey import get_classification
from marc_countries import COUNTRIES
from utils import normalize_isbn
from .dewey import get_classification
from .marc_countries import COUNTRIES
from .utils import normalize_isbn
import logging
logger = logging.getLogger('meta.openlibrary')
@ -41,7 +41,7 @@ def find(query):
results = []
ids = [b for b in r.get('result', []) if b.startswith('/books')]
books = api.get_many(ids).get('result', [])
for olid, value in books.iteritems():
for olid, value in books.items():
olid = olid.split('/')[-1]
book = format(value)
book['olid'] = [olid]
@ -84,7 +84,7 @@ def lookup(id, return_all=False):
data['olid'] = []
if id not in data['olid']:
data['olid'] = [id]
logger.debug('lookup %s => %s', id, data.keys())
logger.debug('lookup %s => %s', id, list(data.keys()))
return data
def get_type(obj):
@ -129,7 +129,7 @@ def format(info, return_all=False):
elif key in ('isbn_10', 'isbn_13'):
if not isinstance(value, list):
value = [value]
value = map(normalize_isbn, value)
value = list(map(normalize_isbn, value))
if KEYS[key] in data:
value = data[KEYS[key]] + value
elif isinstance(value, list) and key not in ('publish_places', 'lccn', 'oclc_numbers'):
@ -149,7 +149,7 @@ def format(info, return_all=False):
def resolve_names(objects, key='name'):
r = []
data = api.get_many([k['key'] for k in objects]).get('result', {})
for k, value in data.iteritems():
for k, value in data.items():
if 'location' in value and value.get('type', {}).get('key') == '/type/redirect':
value = api.get(value['location']).get('result', {})
r.append(value[key])
@ -160,7 +160,7 @@ class API(object):
def _request(self, action, data, timeout=None):
for key in data:
if not isinstance(data[key], basestring):
if not isinstance(data[key], str):
data[key] = json.dumps(data[key])
url = self.base + '/' + action + '?' + urlencode(data)
if timeout is None:
@ -181,7 +181,7 @@ class API(object):
return data
def search(self, query):
if isinstance(query, basestring):
if isinstance(query, str):
query = {
'query': query
}

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import re
import stdnum.isbn
@ -10,6 +10,8 @@ def normalize_isbn(value):
return ''.join([s for s in value if s.isdigit() or s == 'X'])
def find_isbns(text):
if isinstance(text, bytes):
text = text.decode()
matches = re.compile('\d[\d\-X\ ]+').findall(text)
matches = [normalize_isbn(value) for value in matches]
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)

View file

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# vi:si:et:sw=4:sts=4:ts=4
from __future__ import division
import re
import hashlib
@ -99,7 +99,7 @@ def lookup(id):
if m:
data['date'] = m[0]
logger.debug('lookup %s => %s', id, data.keys())
logger.debug('lookup %s => %s', id, list(data.keys()))
return data
info = lookup