find
This commit is contained in:
parent
a9c5fb43fe
commit
e41942ea99
28 changed files with 240 additions and 84 deletions
|
|
@ -7,6 +7,7 @@ import loc
|
|||
import lookupbyisbn
|
||||
import openlibrary
|
||||
import worldcat
|
||||
import google
|
||||
|
||||
providers = [
|
||||
('openlibrary', 'olid'),
|
||||
|
|
@ -17,9 +18,12 @@ providers = [
|
|||
]
|
||||
|
||||
def find(title, author=None, publisher=None, date=None):
|
||||
results = google.find(title=title, author=author, publisher=publisher, date=date)
|
||||
'''
|
||||
results = openlibrary.find(title=title, author=author, publisher=publisher, date=date)
|
||||
for r in results:
|
||||
r['mainid'] = 'olid'
|
||||
'''
|
||||
return results
|
||||
|
||||
def lookup(key, value):
|
||||
|
|
|
|||
38
oml/meta/google.py
Normal file
38
oml/meta/google.py
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# vi:si:et:sw=4:sts=4:ts=4
|
||||
from __future__ import division
|
||||
|
||||
import ox.web.google
|
||||
import stdnum.isbn
|
||||
|
||||
from .utils import find_isbns
|
||||
|
||||
|
||||
def find(title, author=None, publisher=None, date=None):
|
||||
print 'google.find', title, author, publisher, date
|
||||
query = title
|
||||
if author:
|
||||
if isinstance(author, list):
|
||||
author = ' '.join(author)
|
||||
query += ' ' + author
|
||||
query += ' isbn'
|
||||
isbns = []
|
||||
for r in ox.web.google.find(query):
|
||||
isbns += find_isbns(' '.join(r))
|
||||
|
||||
results = []
|
||||
done = set()
|
||||
for isbn in isbns:
|
||||
if isbn not in done:
|
||||
key = 'isbn%d'%len(isbn)
|
||||
#r = lookup(key, isbn)
|
||||
#r['mainid'] = key
|
||||
r = {
|
||||
key: isbn,
|
||||
'mainid': key
|
||||
}
|
||||
results.append(r)
|
||||
done.add(isbn)
|
||||
if len(isbn) == 10:
|
||||
done.add(stdnum.isbn.to_isbn13(isbn))
|
||||
return results
|
||||
|
|
@ -33,7 +33,10 @@ def lookup(id):
|
|||
info = {
|
||||
'lccn': id
|
||||
}
|
||||
info['title'] = ''.join([e.text for e in mods.findall(ns + 'titleInfo')[0]])
|
||||
title = mods.findall(ns + 'titleInfo')
|
||||
if not title:
|
||||
return {}
|
||||
info['title'] = ''.join([e.text for e in title[0]])
|
||||
origin = mods.findall(ns + 'originInfo')
|
||||
if origin:
|
||||
info['place'] = []
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ def get_ids(key, value):
|
|||
if m:
|
||||
asin = m[0].split('/')[-3]
|
||||
ids.append(('asin', asin))
|
||||
if key == 'isbn10':
|
||||
ids.append(('isbn13', stdnum.isbn.to_isbn13(value)))
|
||||
if key == 'asin':
|
||||
if stdnum.isbn.is_valid(value):
|
||||
ids.append(('isbn10', value))
|
||||
|
|
@ -47,14 +49,16 @@ def lookup(id):
|
|||
r[key] = int(r[key])
|
||||
desc = find_re(data, '<h2>Description:<\/h2>(.*?)<div ')
|
||||
desc = desc.replace('<br /><br />', ' ').replace('<br /> ', ' ').replace('<br />', ' ')
|
||||
r['description'] = desc
|
||||
if r['description'] == u'Description of this item is not available at this time.':
|
||||
r['description'] = ''
|
||||
r['description'] = decode_html(strip_tags(desc))
|
||||
r['cover'] = find_re(data, '<img src="(.*?)" alt="Book cover').replace('._SL160_', '')
|
||||
for key in r:
|
||||
if isinstance(r[key], basestring):
|
||||
r[key] = decode_html(strip_tags(r[key])).strip()
|
||||
if 'author' in r and isinstance(r['author'], basestring):
|
||||
if 'author' in r and isinstance(r['author'], basestring) and r['author']:
|
||||
r['author'] = [r['author']]
|
||||
else:
|
||||
r['author'] = []
|
||||
if r['description'].lower() == u'Description of this item is not available at this time.'.lower():
|
||||
r['description'] = ''
|
||||
return r
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,16 @@
|
|||
|
||||
import re
|
||||
import stdnum.isbn
|
||||
|
||||
def normalize_isbn(value):
|
||||
return ''.join([s for s in value if s.isdigit() or s == 'X'])
|
||||
|
||||
def find_isbns(text):
|
||||
matches = re.compile('\d[\d\-X\ ]+').findall(text)
|
||||
matches = [normalize_isbn(value) for value in matches]
|
||||
return [isbn for isbn in matches if stdnum.isbn.is_valid(isbn)
|
||||
and len(isbn) in (10, 13)
|
||||
and isbn not in (
|
||||
'0' * 10,
|
||||
'0' * 13,
|
||||
)]
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue